0

I'm trying to replace special characters(umlauts characters) in xml using StAX.

I'm able to achieve this when input and output xml files are different. But the processing time for a 100MB file is ~10mins. I think it's due to the IO operations that takes time to write to a new file line by line.

Is it possible to read and write the replaced string in same xml file through StAX, which would save the IO operation?

Any help/clue would be be much helpful for me.

public class StAXXMLFileDemo {
 static XMLEventFactory m_eventFactory = XMLEventFactory.newInstance();

 static String[] searchList = { "Ä", "ä", "Ö", "ö", "Ü", "ü", "ß" };
 static String[] replacementList = { "Ae", "ae", "Oe", "oe", "Ue", "ue", "ss" };

 public static void main(String[] args) {

     if(args.length == 2)
     {
         File inputfileDirectory = new File(args[0]);
         File outputfileDirectory = new File(args[1]);
         try {
             FileUtils.cleanDirectory(outputfileDirectory);
         } catch (IOException e1) {
             e1.printStackTrace();
             System.out.println("Exception in deleting output directory files");
         } 

         if (inputfileDirectory.isDirectory() && outputfileDirectory.isDirectory())
         {
             File[] files = inputfileDirectory.listFiles();
             for (File file : files) {
                 if ( (file.isDirectory() == false) && (file.getAbsolutePath().endsWith(".xml")) ){

                     try {
                         String outputFileName = file.getName();

                         String absoluteFilePath = outputfileDirectory+File.separator+outputFileName;

                         //StAXXMLFileDemo ms = new StAXXMLFileDemo();

                         XMLInputFactory factory = XMLInputFactory.newInstance();

                         XMLOutputFactory outFactory =  XMLOutputFactory.newInstance();

                         //XMLEventFactory xmlEventFactory = XMLEventFactory.newInstance();

                         InputStream is = null;
                         try {
                             is = new FileInputStream(file);
                         } catch (FileNotFoundException e1) {
                             e1.printStackTrace();
                         }
                         XMLEventReader eventReader =
                                 factory.createXMLEventReader(is,"UTF-8");
                         OutputStream outputStream = new FileOutputStream(absoluteFilePath);
                         XMLEventWriter eventWriter =
                                 outFactory.createXMLEventWriter(
                                         outputStream,"UTF-8");
                         //XMLStreamWriter writer = outFactory.createXMLStreamWriter(outputStream,"UTF-8");
                        // writer.writeStartDocument();

                        // StartDocument startDocument =  xmlEventFactory.createStartDocument("UTF-8", "1.0", false);
                          //eventWriter.add(startDocument);
                         while(eventReader.hasNext()){
                             XMLEvent event = eventReader.nextEvent();
                            // eventWriter.add(event);
                             switch(event.getEventType()){

                             case XMLStreamConstants.SPACE:
                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.NAMESPACE:
                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.ATTRIBUTE:
                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.CDATA:
                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.NOTATION_DECLARATION:
                                 eventWriter.add(event);
                                 break;

                             case XMLStreamConstants.PROCESSING_INSTRUCTION:
                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.START_DOCUMENT:

                                 eventWriter.add(event);
                                 break;

                             case XMLStreamConstants.START_ELEMENT:

                                 eventWriter.add(event);
                                 break;
                             case XMLStreamConstants.CHARACTERS:

                                 //String replaceString = event.toString();
                                // String replaced = StringUtils.replaceEachRepeatedly(event.toString(), searchList, replacementList);

                                 //eventWriter.add(ms.getNewCharactersEvent(event.asCharacters()));
                                 eventWriter.add(m_eventFactory.createCharacters(StringUtils.replaceEachRepeatedly(event.toString(), searchList, replacementList)));
                                 break;
                             case  XMLStreamConstants.END_ELEMENT:

                                 eventWriter.add(event);
                                 break;
                             }          
                         }

                         eventWriter.flush();
                         eventWriter.close();
                         try {
                             outputStream.close();
                         } catch (IOException e) {
                             // TODO Auto-generated catch block
                             e.printStackTrace();
                         }

                     } catch (FileNotFoundException e) {
                         e.printStackTrace();
                     } catch (XMLStreamException e) {
                         e.printStackTrace();
                     }
                 }
             }
         }
     }
 }
Viswanath
  • 3
  • 6
  • I'm facing the same issue as posted here http://stackoverflow.com/questions/28629709/java-stax-standalone-property-of-startdocument – Viswanath Apr 27 '15 at 08:58
  • Does anyone have encountered this issue and have a solution for it? I think StAX API have an issue in writing the event correctly. – Viswanath Apr 27 '15 at 08:59

0 Answers0