2

I am trying to parse xml document that includes special characters such as "//gpa[.<2.0]" in some of the nodes. However, the parser reads only part of node's contents like "//gpa[." where special characters is located. I tried different ways found online but still getting the same result. How can I parse all node's content.

Here is my part of code and thank you in advance:

package temp;

import java.util.*;
import java.lang.Object.*;
import org.xml.sax.*;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.SAXParser;
import org.apache.commons.lang.StringEscapeUtils;

public class Login implements LoginInterface{

public static String elementName="";    
public static StringBuilder chars = new StringBuilder();

public static void startDocument ()
throws SAXException, IOException
{
}

public static void endDocument ()
throws SAXException
{
}

public static void characters (char buf [], int offset, int len)
throws SAXException
{
    chars.append(new String(buf , offset, len));                
        if(elementName.equals("object"))
            {                    
            //tempObjectStr += org.apache.commons.lang.StringEscapeUtils.escapeXml( new String(buf, offset, len));
            //characters(s.toCharArray(),0,s.length());                                            
            System.out.println( "objectNodeContent: " + chars.toString());            
            }
}

public static void startElement (String uri, String localname, String name, Attributes attrs)
throws SAXException
 {
    chars.setLength(0);
    //nodeContent = "";
    elementName=name;       
 }

public static void endElement (String uri, String localName, String name)
throws SAXException
{        
}    

public static void main(String args[]) {    
    try
    {            
         SAXParser saxParser2 = factory.newSAXParser();
         saxParser2.parse( "authorization.xml",new LoginHandlerBase());             
    }
    catch(Exception e)
    {
        System.out.println("Error:"+e);
    }
}
}

And here is part of Authorization.xml:

<rules>
    <rule>
        <role>staff</role>
        <object>/department/gradstudent/address</object>        
        <action>Read</action>
        <type>R</type>
    </rule>
    <rule>
        <role>staff</role>
        <object>//gpa[.&lt;2.0]</object>        
        <action>Read</action>
        <type>L</type>
    </rule>
</rules>

The output looks likes:

objectNodeContent: /department/gradstudent/address
objectNodeContent: //gpa[.
user2374297
  • 21
  • 1
  • 4
  • possible duplicate of [SAX parsing and special characters](http://stackoverflow.com/questions/13336140/sax-parsing-and-special-characters) – Tim Biegeleisen Jun 11 '15 at 04:49
  • Please review the above SO question. You need to create a custom SAX handler which extends `DefaultHandler`. – Tim Biegeleisen Jun 11 '15 at 04:50
  • @Tim Biegeleisen, he doesn't appear to be making the same (common) mistake as the person who asked that question. – Michael Kay Jun 11 '15 at 08:42
  • @MichaelKay I would welcome you to answer the question if you be so inclined. – Tim Biegeleisen Jun 11 '15 at 08:43
  • 2
    My initial reaction was: you must be making the common mistake of assuming that all the character data is passed to the characters() callback in one lump. But your code suggests that you are buffering it properly. So something else is wrong, and I can't tell what. I suggest you trace all the calls to characters() (either using println(), or using an interactive debugger). – Michael Kay Jun 11 '15 at 08:44
  • @TimBiegeleisen We don't yet have enough information to answer the question. – Michael Kay Jun 11 '15 at 17:34

1 Answers1

1

see javadoc

as michael kay said, the character data might come in several chunks. your contenthandler should collect them. below code shows how to do this:

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.SAXParserFactory;
import java.io.File;

public class MySAXHandler extends DefaultHandler{
    private StringBuilder content = new StringBuilder();

    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException{
        content.setLength(0);
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException{
        content.append(ch, start, length);
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException{
        if(localName.equals("object"))
            System.out.println("objectNodeContent: " + content);
        content.setLength(0);
    }

    public static void main(String[] args) throws Exception{
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);
        factory.newSAXParser().parse(new File("Authorization.xml"), new MySAXHandler());
    }
}

the output of above code is:

objectNodeContent: /department/gradstudent/address
objectNodeContent: //gpa[.<2.0]
Santhosh Kumar Tekuri
  • 3,012
  • 22
  • 22