0

Consider the XML file, Report.xml :

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
      <Statistics>
        <child value="abc">
         <subchild>...</subchild>
        </child>
        <child value="xyz">
         <subchild>...</subchild>
        </child>
      </Statistics>
      <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
      </Properties>
      <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
      </OverallStatistics>
  </Report>

I just want to split the above XML file as:

ReportSplit1.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
   <Statistics>
      <child value="abc">
         <subchild>...</subchild>
      </child>
   </Statistics>
   <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
   </Properties>
   <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
    </OverallStatistics>
</Report>

ReportSplit2.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
   <Statistics>
      <child value="xyz">
         <subchild>...</subchild>
      </child>
   </Statistics>
   <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
   </Properties>
   <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
    </OverallStatistics>
</Report>

i.e. retaining the parent node's attributes and retaining the sibling nodes. The split should be made only on the children in Statistics node.

Followed the workaround given in the link by changing the snippet as

package xmlsplitting;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
public class XmlSplit
{
    static public void main(String[] arg) throws Exception
    {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse("D:\\Analyzer\\FileSplit\\Report.xml");
        TransformerFactory tranFactory = TransformerFactory.newInstance(); 
        Transformer aTransformer = tranFactory.newTransformer(); 
        XPath xpath = XPathFactory.newInstance().newXPath();
        NodeList list = (NodeList)xpath.evaluate("//Report/Statistics/child", doc, XPathConstants.NODESET);
        for (int i=1; i<list.getLength(); i++)
        {
            Node element = list.item(i).cloneNode(true);
            if(element.hasChildNodes())
            {
                  Source src = new DOMSource(element); 
                  FileOutputStream fs = new FileOutputStream( "D:\\Analyzer\\FileSplit\\ReportSplit"+ i + ".xml");
                  Result dest = new StreamResult(fs);
                  aTransformer.transform(src, dest);
                  fs.close();
            }
        }
    }
}

The achieved XML files splits are:

ReportSplit1.xml

  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <child value="abc">
      <subchild>...</subchild>
   </child>

ReportSplit2.xml

  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <child value="xyz">
      <subchild>...</subchild>
   </child>

Could anyone provide a workaround to achieve the desired XML files splits?

Community
  • 1
  • 1

2 Answers2

0

Your xpath evaluation expresses that you only look for child and lower. You'll need make additional expressions for the other fields i.e. statistics and properties as I presume you'll want to split up there too.

Arphylion
  • 38
  • 1
  • 6
0

Consider using XSLT, the declarative, special-purpose programming language to transform XML documents instead of XPath as you require whole document transformation. For your purposes, an embedded, dynamic XSLT run on a loop of values can output multiple XML files:

XSLT Script (embedded below, example here uses 'abc' which is iteratively used and replaced)

<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>

  <!-- Identity Transform -->
  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="child[not(@value='abc')]"/>

</xsl:transform>

Java Script

import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;

import java.io.*;
import java.net.URISyntaxException;

import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XmlSplit {
    public static void main(String[] args) throws IOException, URISyntaxException,
                                                  SAXException, ParserConfigurationException,
                                                  TransformerException {

        // Load XML Source
        String inputXML = "/path/to/XMLSource.xml";

        // Declare XML Values Array
        String[] xmlVals = {"abc", "xyz"};

        // Iterate through Values running dynamic, embedded XSLT
        for (String s: xmlVals) {
            String outputXML = "/path/to/output_" + s + ".xml";

            String xslStr = String.join("\n",
                "<xsl:transform xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" version=\"1.0\">",
                "<xsl:output version=\"1.0\" encoding=\"UTF-8\" indent=\"yes\" />",
                "<xsl:strip-space elements=\"*\"/>",
                "<xsl:template match=\"@*|node()\">",
                "<xsl:copy>",
                "<xsl:apply-templates select=\"@*|node()\"/>",
                "</xsl:copy>",
                "</xsl:template>",
                "<xsl:template match=\"child[not(@value='"+ s +"')]\"/>",
                "</xsl:transform>");

            Source xslt = new StreamSource(new StringReader(xslStr));            
            DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();            
            DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
            Document doc = docBuilder.parse (new File(inputXML));

            // XSLT Transformation  with pretty print
            TransformerFactory prettyPrint = TransformerFactory.newInstance();
            Transformer transformer = prettyPrint.newTransformer(xslt);

            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
            transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");                        

            // Output Result to File
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(new File(outputXML));        
            transformer.transform(source, result);
        }

    }
}
Parfait
  • 104,375
  • 17
  • 94
  • 125