I have this code in BeautifulSoup4
:
import glob
from bs4 import BeautifulSoup
# get all ccl_tags
# rearrange
# replace current ccl_tag with data replaced
# print to output file
def rearrange_xml(input_file, output_file, tag_ordering):
# Reading the data inside the xml
# file to a variable under the name
# data
with open(input_file, "r") as f:
data = f.read()
# Passing the stored data inside
# the beautifulsoup parser, storing
# the returned object
Bs_data = BeautifulSoup(data, "xml")
# Finding all instances of tag
# `ccl_tag` asign to "ccls"
ccls = Bs_data.find_all("ccl_tag")
# rearrange the ccl based on the given ordering
for ccl in ccls:
# declare new ccl
new_ccl = BeautifulSoup()
tags_to_rearrange = []
# find all tags in tag ordering which are also inside ccl
for tag_name in tag_ordering:
tags = ccl.find_all(tag_name)
tags_to_rearrange.extend(tags)
# find only the found tags to rearrage and apply to new ccl
for tag in tags_to_rearrange:
tag.extract()
# only add the tags specified
new_ccl.append(tag)
print(new_ccl)
# replace with doesn"t work?
ccl.replace_with(new_ccl)
print(Bs_data.find("ccl_tag"))
with open(output_file, "w") as file:
file.write(Bs_data.prettify())
# Get a list of XML files in the same directory
xml_files = glob.glob("*.xml")
tag_ordering = [
"date_tag",
"req_tag",
"req2_tag",
"req3_tag"
]
# Rearrange each XML file
for input_file in xml_files:
# don't include output_ files
if not input_file.startswith("output_"):
output_file = f"output_{input_file}"
rearrange_xml(input_file, output_file, tag_ordering)
which get's the xml files in current directory and creates a new xml file from the input file.
It then finds a tag, ccl_tag, and replaces that tag with the same tag only rearranged, and with specific tags included.
However, the code to replace the tag does not work. I can confirm by printing new_ccl that the values are correct, but does not copy properly.
What could be wrong?