Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2
import xml.
sax
class MyHandler(xml.sax.ContentHandler): def __init__(self): self.buffer = "" # Buffer to hold partial chunks self.ids_and_names = []
def startElement(self, name, attrs):
pass
def characters(self, content):
self.buffer += content
def endElement(self, name):
if name == "REC": # Check if the required substrings are present in the buffered content if ":70: MUHALLA, Mohamed" in self.buffer and "<NAM> MUHALLA,MOHAMED</NAM>" in self.buffer: # Extract the identifier and name from the buffered content identifier = self.extract_identifier(self.buffer) name = self.extract_name(self.buffer)
# Append the identifier and name to the list
self.ids_and_names.append((identifier, name))
# Clear the buffer after processing the message
self.buffer = ""
def extract_identifier(self, content):
# Code to extract the identifier from the content # Implement your logic here pass
def extract_name(self, content):
# Code to extract the name from the content # Implement your logic here
chunk_size = 1024 # Adjust the size as per your needs
# Parse the XML file in chunks
with open("your_file.xml", "r") as xml_file: while True: chunk = xml_file.read(chunk_size) if not chunk: break # Add the chunk to the buffer handler.buffer += chunk
# Process complete messages in the buffer
while "</H>" in handler.buffer: start_index = handler.buffer.find("<H>") end_index = handler.buffer.find("</H>") + len("</H>") if start_index != -1 and end_index != -1: message = handler.buffer[start_index:end_index] parser.feed(message) handler.buffer = handler.buffer[end_index:]
# Access the extracted IDs and names
ids_and_names = handler.ids_and_names
# Process the extracted data as needed
for identifier, name in ids_and_names: print("Identifier:", identifier) print("Name:", name) print()