I am carrying out XML parsing for a list of XML files. I am using a module which overrides the XMLParser class of element tree. This is the code-
import sys
sys.modules['_elementtree'] = None
try:
sys.modules.pop('xml.etree.ElementTree')
except KeyError:
pass
import xml.etree.ElementTree as ET
class Parse():
def __init__(self):
self.xmlFiles = [list_of_xmlFile_paths]
def parse_xml_files(self):
for filepath in self.xmlFiles:
root = ET.parse(filepath, LineNumberingParser()).getroot()
for elem in root:
print(elem.start_line_numer, elem.end_line_number)
class LineNumberingParser(ET.XMLParser):
def _start(self, *args, **kwargs):
# Here we assume the default XML parser which is expat
# and copy its element position attributes into output Elements
self.element = super(self.__class__, self)._start(*args, **kwargs)
self.element.start_line_number = self.parser.CurrentLineNumber
self.element.start_column_number = self.parser.CurrentColumnNumber
return self.element
def _end(self, *args, **kwargs):
self.element = super(self.__class__, self)._end(*args, **kwargs)
self.element.end_line_number = self.parser.CurrentLineNumber
self.element.end_column_number = self.parser.CurrentColumnNumber
return self.element
The class LineNumberingParser gives me the begin line, end line of an xml node. My issue is that, for every xml file, the class is initialised.So this repetitive initialisation is not efficient. How can I do this by initialising the class only once? Can anyone please suggest.