0

I have this code in BeautifulSoup4:

import glob
from bs4 import BeautifulSoup

# get all ccl_tags
# rearrange
# replace current ccl_tag with data replaced
# print to output file

def rearrange_xml(input_file, output_file, tag_ordering):
  # Reading the data inside the xml
  # file to a variable under the name
  # data
  with open(input_file, "r") as f:
     data = f.read()
  
  # Passing the stored data inside
  # the beautifulsoup parser, storing
  # the returned object
  Bs_data = BeautifulSoup(data, "xml")
  
  # Finding all instances of tag
  # `ccl_tag` asign to "ccls"
  ccls = Bs_data.find_all("ccl_tag")
  
  # rearrange the ccl based on the given ordering
  for ccl in ccls:
     # declare new ccl
     new_ccl = BeautifulSoup()
     tags_to_rearrange = []
     # find all tags in tag ordering which are also inside ccl
     for tag_name in tag_ordering:
        tags = ccl.find_all(tag_name)
        tags_to_rearrange.extend(tags)
     # find only the found tags to rearrage and apply to new ccl
     for tag in tags_to_rearrange:
        tag.extract()
        # only add the tags specified
        new_ccl.append(tag)
     print(new_ccl)
     # replace with doesn"t work?
     ccl.replace_with(new_ccl)

  print(Bs_data.find("ccl_tag"))
  
  with open(output_file, "w") as file:
     file.write(Bs_data.prettify())

# Get a list of XML files in the same directory


xml_files = glob.glob("*.xml")
   tag_ordering = [
      "date_tag",
      "req_tag",
      "req2_tag",
      "req3_tag"
   ]

# Rearrange each XML file


for input_file in xml_files:
      # don't include output_ files
      if not input_file.startswith("output_"):
         output_file = f"output_{input_file}"
         rearrange_xml(input_file, output_file, tag_ordering)

which get's the xml files in current directory and creates a new xml file from the input file.

It then finds a tag, ccl_tag, and replaces that tag with the same tag only rearranged, and with specific tags included.

However, the code to replace the tag does not work. I can confirm by printing new_ccl that the values are correct, but does not copy properly.

What could be wrong?

Prosy Arceno
  • 2,616
  • 1
  • 8
  • 32

0 Answers0