0

I want to split the XML file into separate XML's. By reading the and tags. I have split the XML's using StringUtils.substringsBetween. But, I am able to split only two XML's from the file. The third XML is getting appended with the second.

Program:

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;

public class SBuff {

    private BufferedReader br;

    public SBuff() {
        // TODO Auto-generated constructor stub
    }

    public static void main(String[] args) throws IOException {

         String data = "<?xml version=\"1.0\"?>\r\n" + 
                "<company>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>yong</firstname>\r\n" + 
                "        <lastname>mook kim</lastname>\r\n" + 
                "        <nickname>mkyong</nickname>\r\n" + 
                "        <salary>100000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>low</firstname>\r\n" + 
                "        <lastname>yin fong</lastname>\r\n" + 
                "        <nickname>fong fong</nickname>\r\n" + 
                "        <salary>200000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "    <staff>\r\n" + 
                "        <firstname>low</firstname>\r\n" + 
                "        <lastname>yin fong</lastname>\r\n" + 
                "        <nickname>fong fong</nickname>\r\n" + 
                "        <salary>200000</salary>\r\n" + 
                "    </staff>\r\n" + 
                "</company>\r\n"+
                "</xml>";

        SBuff s = new SBuff();
        s.loadData(data);
    }

    public void loadData(String stream) throws IOException {
        String[] list = StringUtils.substringsBetween(stream,
                "<staff">", "</staff>");

        StringBuilder stringBuilder = new StringBuilder();
        for (String s : list) {

            stringBuilder.append("<staff>");
            stringBuilder.append(s);
            stringBuilder.append("</staff>");
            System.out.println("##################################");
            System.out.println(stringBuilder.toString());
            System.out.println("##################################");
        }

    }

    public void display(String data) {
        System.out.println("Buffer Data: " + data);
    }
}

Output:

Expected:

##################################
<staff> 
   <firstname>yong</firstname> 
   <lastname>mook kim</lastname>
   <nickname>mkyong</nickname> 
   <salary>100000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################


Actual:

##################################
<staff> 
   <firstname>yong</firstname> 
   <lastname>mook kim</lastname>
   <nickname>mkyong</nickname> 
   <salary>100000</salary>
</staff>
##################################
<staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff><staff>
   <firstname>low</firstname> 
   <lastname>yin fong</lastname> 
   <nickname>fong fong</nickname> 
   <salary>200000</salary>
</staff>
##################################

I tried with different methods. None seems to workout. Thanks in Advance.

JKB
  • 73
  • 2
  • 8
  • 3
    Never process XML with string manipulation tools, unless you know exactly what you are doing. Parse the XML string with a XML parser and do your changes at XML element level. – vanje Jun 13 '19 at 12:37
  • manipulation of serialized data of any kind using regex, String#split etc is always a bad idea – Alex Salauyou Jun 13 '19 at 12:59

2 Answers2

1

You can use a XML parser like jsoup.

<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.12.1</version>
</dependency>

Parse and splitting:

 Document doc = Jsoup.parse(data, "", Parser.xmlParser());
 for(Element staffElement: doc.getElementsByTag("staff")) {
   System.out.println(staffElement);
   System.out.println("##################################");
 }
TechFree
  • 2,600
  • 1
  • 17
  • 18
0

For the actual output - your loadData function would be

public void loadData(String stream) throws IOException {
        String[] list = StringUtils.substringsBetween(stream, "<staff>", "</staff>");

        StringBuilder stringBuilder = null;
        System.out.println("##################################");
        for (String s : list) {
            stringBuilder=new StringBuilder();
            stringBuilder.append("<staff>");
            stringBuilder.append(s);
            stringBuilder.append("</staff>");
           // System.out.println("##################################");
            System.out.println(stringBuilder.toString());
            System.out.println("##################################");
        }

    }
Nitika Bansal
  • 740
  • 5
  • 10