0% found this document useful (0 votes)
2 views

Code

Uploaded by

Debanjan Dey
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Code

Uploaded by

Debanjan Dey
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import xml.

sax

class MyHandler(xml.sax.ContentHandler):
def __init__(self):
self.buffer = "" # Buffer to hold partial chunks
self.ids_and_names = []

def startElement(self, name, attrs):


pass

def characters(self, content):


self.buffer += content

def endElement(self, name):


if name == "REC":
# Check if the required substrings are present in the buffered content
if ":70: MUHALLA, Mohamed" in self.buffer and "<NAM>
MUHALLA,MOHAMED</NAM>" in self.buffer:
# Extract the identifier and name from the buffered content
identifier = self.extract_identifier(self.buffer)
name = self.extract_name(self.buffer)

# Append the identifier and name to the list


self.ids_and_names.append((identifier, name))

# Clear the buffer after processing the message


self.buffer = ""

def extract_identifier(self, content):


# Code to extract the identifier from the content
# Implement your logic here
pass

def extract_name(self, content):


# Code to extract the name from the content
# Implement your logic here

# Create an instance of the XML handler


handler = MyHandler()

# Create a SAX parser


parser = xml.sax.make_parser()

# Disable namespace handling if not required


parser.setFeature(xml.sax.handler.feature_namespaces, 0)

# Set the content handler for the parser


parser.setContentHandler(handler)

# Chunk size for processing


chunk_size = 1024 # Adjust the size as per your needs

# Parse the XML file in chunks


with open("your_file.xml", "r") as xml_file:
while True:
chunk = xml_file.read(chunk_size)
if not chunk:
break
# Add the chunk to the buffer
handler.buffer += chunk

# Process complete messages in the buffer


while "</H>" in handler.buffer:
start_index = handler.buffer.find("<H>")
end_index = handler.buffer.find("</H>") + len("</H>")
if start_index != -1 and end_index != -1:
message = handler.buffer[start_index:end_index]
parser.feed(message)
handler.buffer = handler.buffer[end_index:]

# Access the extracted IDs and names


ids_and_names = handler.ids_and_names

# Process the extracted data as needed


for identifier, name in ids_and_names:
print("Identifier:", identifier)
print("Name:", name)
print()

You might also like