I am trying to convert an XML file in CVAT format to YOLO format.
but I am encountering a type error while trying to obtain yolo_x, y, w, and h
tried using 'int' to resolve the issue, but I am still encountering a 'value error'.
I would appreciate it if you could provide a good solution. Also, could you please evaluate the code I am currently working on for converting from CVAT format to YOLO format? If there is a better way, please let me know. Thank you.
xml
<image id="0" name="20230226_145238.jpg" width="510" height="680">
<box label="meter" source="manual" occluded="0" xtl="202.10" ytl="259.28" xbr="314.74" ybr="305.89" z_order="0">
</box>
<box label="zero" source="manual" occluded="0" xtl="219.68" ytl="269.57" xbr="234.84" ybr="293.43" z_order="0">
</box>
<box label="eight" source="manual" occluded="0" xtl="234.84" ytl="270.35" xbr="249.11" ybr="293.99" z_order="0">
</box>
<box label="one" source="manual" occluded="0" xtl="249.78" ytl="271.69" xbr="263.38" ybr="293.88" z_order="0">
</box>
<box label="one" source="manual" occluded="0" xtl="263.38" ytl="272.69" xbr="278.99" ybr="295.10" z_order="0">
</box>
<box label="eight" source="manual" occluded="0" xtl="279.55" ytl="272.80" xbr="293.71" ybr="296.33" z_order="0">
</box>
<box label="three" source="manual" occluded="0" xtl="295.38" ytl="272.58" xbr="308.68" ybr="297.33" z_order="0">
</box>
<box label="dot" source="manual" occluded="0" xtl="289.47" ytl="292.43" xbr="293.93" ybr="296.55" z_order="0">
</box>
</image>
code
import os
import glob
from xml.etree.ElementTree import parse
xml_dir = "./data/"
class Voc_to_yolo_convter():
def __init__(self, xml_path):
self.xml_path_list = glob.glob(os.path.join(xml_path, "*.xml"))
# print(self.xml_path_list)
def get_voc_to_yolo(self):
for xml_path in self.xml_path_list:
tree = parse(xml_path)
root = tree.getroot()
# image_meta
meta = root.findall("image")
for image in meta:
image_name = image.attrib["name"]
image_width = image.attrib["width"]
image_height = image.attrib["height"]
# print(image_width, image_height)
# object_meta
object_metas = image.findall("box")
for bbox in object_metas:
label = bbox.attrib["label"]
xtl = int(bbox.attrib["xtl"])
ytl = bbox.attrib["ytl"]
xbr = bbox.attrib["xbr"]
ybr = bbox.attrib["ybr"]
# CVAT to yolo
yolo_x = round(((xtl + xbr)/2)/image_width, 6)
yolo_y = round(((ytl + ybr)/2)/image_height, 6)
yolo_w = round((xbr - xtl)/image_width, 6)
yolo_h = round((ybr - ytl)/image_height, 6)
print(yolo_x, yolo_y, yolo_w, yolo_h)
test= Voc_to_yolo_convter(xml_dir)
test.get_voc_to_yolo()