I will split a large XML to small branches and than parse only this parts. I search modified timestamp "mod_time" tag which is avaliable in "contacts" tag, but my object function call, doesn't find the value. In some contacts is also some tags missing completly.
I tried iterfind('tag_name')
, iter()
, findall('tag_name')
, but my program shows no result and I can't figure out for hours, where my failure is.
Here is my XML reduced to two elements:
<?xml version="1.0" encoding = "utf-8"?>
<phonebooks>
<phonebook name="Telefonbuch">
<contact>
<category>0</category>
<person>
<realName>Dummy, Name, Street</realName>
</person>
<telephony nid="1">
<number type="work" prio="1" id="0">012345678</number>
</telephony>
<services />
<setup />
<features doorphone="0" />
<mod_time>1587477163</mod_time>
<uniqueid>358</uniqueid>
</contact>
<contact>
<category>0</category>
<person>
<realName>Foto Name</realName>
</person>
<telephony nid="1">
<number type="home" prio="1" id="0">067856743</number>
</telephony>
<services />
<setup />
<features doorphone="0" />
<mod_time>1547749691</mod_time>
<uniqueid>68</uniqueid>
</contact>
</phonebook>
</phonebooks>
and her what I have done so fare:
import timeit
import xml.etree.ElementTree as ET
class Phonebook:
def __init__(self, xml_file, tag_node):
"""Split tree in contact branches """
self.xml_file = xml_file
self.tag_node = tag_node
# For furter parsing
contacts = []
i = 0
events =('start','end','start-ns','end-ns')
for event, elem in ET.iterparse(self.xml_file, events=events):
if event == 'end' and elem.tag == self.tag_node[0]:
#print(elem.tag)
contacts.append(elem)
par = Contact(elem, i)
par.parse_node(elem, i)
i += 1
elem.clear()
print("Amount of contacts:", len(contacts))
class Contact:
def __init__(self, branch, i):
self.tree = branch
#print(i, self.tree)
def parse_node(self, branch, i):
for node in branch.iterfind('.//mod_time'):
print(node.text)
def main():
elem = Phonebook('new _dummy1.xml',['contact'])
if __name__ == '__main__':
""" Input XML file definition """
starttime=timeit.default_timer()
main()
print('Finished')
print("Runtime:", timeit.default_timer()-starttime)
Output:
Amount of contacts: 2 Finished Runtime: 0.0006361000050674193
Expected output:
1587477163 1547749691