import xml.etree.ElementTree as ET
import logging
class XmlTree():
def __init__(self):
self.hdlr = logging.FileHandler('xml-comparison.log')
self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
@staticmethod
def convert_string_to_tree( xmlString):
return ET.fromstring(xmlString)
def xml_compare(self, x1, x2, excludes=[]):
"""
Compares two xml etrees
:param x1: the first tree
:param x2: the second tree
:param excludes: list of string of attributes to exclude from comparison
:return:
True if both files match
"""
if x1.tag != x2.tag:** Error occuring Line**
self.logger.debug('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if not name in excludes:
if x2.attrib.get(name) != value:
self.logger.debug('Attributes do not match: %s=%r, %s=%r'
% (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if not name in excludes:
if name not in x1.attrib:
self.logger.debug('x2 has an attribute x1 is missing: %s'
% name)
return False
if not self.text_compare(x1.text, x2.text):
self.logger.debug('text: %r != %r' % (x1.text, x2.text))
return False
if not self.text_compare(x1.tail, x2.tail):
self.logger.debug('tail: %r != %r' % (x1.tail, x2.tail))
return False
cl1 = x1.getchildren()
cl2 = x2.getchildren()
if len(cl1) != len(cl2):
self.logger.debug('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not c1.tag in excludes:
if not self.xml_compare(c1, c2, excludes):
self.logger.debug('children %i do not match: %s'
% (i, c1.tag))
return False
return True
xml1 = "File1.xml"
xml2 = "File2.xml"
tree1 = XmlTree.parse(xml1).getroot()
tree2 = XmlTree.parse(xml2).getroot()
comparator = XmlTree()
if comparator.xml_compare(tree1, tree2, ["from"]):
print "XMLs match"
else:
print "XMLs don't match"
Asked
Active
Viewed 90 times
-1

McGrady
- 10,869
- 13
- 47
- 69

Dinesh Kumar
- 1
- 3
1 Answers
0
At first, there's no parse()
method in XmlTree
class, so I suppose what you want is
tree1 = ET.parse(xml1).getroot()
tree2 = ET.parse(xml2).getroot()
By the way maybe you should declare self.logger
in __init__
method by
def __init__(self):
self.hdlr = logging.FileHandler('xml-comparison.log')
self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
self.hdlr.setFormatter(self.formatter)
self.logger = logging.getLogger("Sample")
self.logger.setLevel(logging.INFO)
self.logger.addHandler(self.hdlr)
Here is the full code, it works well in both Python2.x and python3.x:
import xml.etree.ElementTree as ET
import logging
class XmlTree():
def __init__(self):
self.hdlr = logging.FileHandler('test.log')
self.formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
self.hdlr.setFormatter(self.formatter)
self.logger = logging.getLogger("Sample")
self.logger.setLevel(logging.INFO)
self.logger.addHandler(self.hdlr)
@staticmethod
def convert_string_to_tree( xmlString):
return ET.fromstring(xmlString)
def xml_compare(self, x1, x2, excludes=[]):
"""
Compares two xml etrees
:param x1: the first tree
:param x2: the second tree
:param excludes: list of string of attributes to exclude from comparison
:return:
True if both files match
"""
if x1.tag != x2.tag:
self.logger.debug('Tags do not match: %s and %s' % (x1.tag, x2.tag))
return False
for name, value in x1.attrib.items():
if not name in excludes:
if x2.attrib.get(name) != value:
self.logger.debug('Attributes do not match: %s=%r, %s=%r'
% (name, value, name, x2.attrib.get(name)))
return False
for name in x2.attrib.keys():
if not name in excludes:
if name not in x1.attrib:
self.logger.debug('x2 has an attribute x1 is missing: %s'
% name)
return False
if not self.text_compare(x1.text, x2.text):
self.logger.debug('text: %r != %r' % (x1.text, x2.text))
return False
if not self.text_compare(x1.tail, x2.tail):
self.logger.debug('tail: %r != %r' % (x1.tail, x2.tail))
return False
cl1 = x1.getchildren()
cl2 = x2.getchildren()
if len(cl1) != len(cl2):
self.logger.debug('children length differs, %i != %i'
% (len(cl1), len(cl2)))
return False
i = 0
for c1, c2 in zip(cl1, cl2):
i += 1
if not c1.tag in excludes:
if not self.xml_compare(c1, c2, excludes):
self.logger.debug('children %i do not match: %s'
% (i, c1.tag))
return False
return True
def text_compare(self, t1, t2):
"""
Compare two text strings
:param t1: text one
:param t2: text two
:return:
True if a match
"""
if not t1 and not t2:
return True
if t1 == '*' or t2 == '*':
return True
return (t1 or '').strip() == (t2 or '').strip()
xml1 = "1.xml"
xml2 = "2.xml"
tree1 = ET.parse(xml1).getroot()
tree2 = ET.parse(xml2).getroot()
comparator = XmlTree()
if comparator.xml_compare(tree1, tree2, ["from"]):
print("XMLs match")
else:
print("XMLs don't match")
1.xml
<note>
<to>Tove</to>
<from>Jani</from>
</note>
2.xml
<note>
<to>Tove</to>
</note>

McGrady
- 10,869
- 13
- 47
- 69
-
I modified the code as per your suggestion. But still getting the same problem. I'm trying to run this code in Anaconda python 3.6 whether that is creating any problem. Actually i took this code from this stack exchange post http://stackoverflow.com/questions/24492895/comparing-two-xml-files-in-python. Could you please help me with this case. I'm a beginner level in python doesn't have much programming background. – Dinesh Kumar Apr 27 '17 at 11:30
-
@DineshKumar `print "XMLs match"` it will return SyntaxError in Python3.x. – McGrady Apr 27 '17 at 12:21
-
Can the above the code able to iterate all the tags in the XML or i should rewrite it, why because i doubt in the line (if x1.tag != x2.tag:** Error occuring Line**) as it keeps erroring there. – Dinesh Kumar Apr 28 '17 at 08:17
-
@DineshKumar I edit my answer, have a look, the code works well. So maybe there's something wrong elsewhere in your project. – McGrady Apr 28 '17 at 08:24