Use StringEscapeUtils from commons-lang.jar library.
Here is working code:
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;
public class Test {
public static void main(String[] args) {
String url = "https://www.sec.gov/Archives/edgar/data/16160/000001616016000061/calm-20160528.xml";
URL surl;
try {
surl = new URL(url);
URLConnection con = surl.openConnection();
con.setConnectTimeout(0);
con.setReadTimeout(0);
InputStream in = con.getInputStream();
StringWriter writer = new StringWriter();
IOUtils.copy(in, writer, "UTF-8");
System.out.println(StringEscapeUtils.unescapeHtml(writer.toString()));
} catch (MalformedURLException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
Output is without escaped characters, here is sample from console:
<td valign="bottom" style="width:02.96%;border-top:1pt none #D9D9D9 ;border-left:1pt none #D9D9D9 ;border-bottom:1pt none #D9D9D9 ;border-right:1pt none #D9D9D9 ;background-color: #auto;height:1.00pt;padding:0pt;">
<p style="margin:0pt;font-family:Times New Roman;height:1.00pt;overflow:hidden;font-size:0pt;">
</p>
</td>
<td valign="bottom" style="width:02.40%;border-top:1pt none #D9D9D9 ;border-left:1pt none #D9D9D9 ;border-bottom:1pt none #D9D9D9 ;border-right:1pt none #D9D9D9 ;background-color: #auto;height:1.00pt;padding:0pt;">
<p style="margin:0pt;font-family:Times New Roman;height:1.00pt;overflow:hidden;font-size:0pt;">
</p>
</td>
<td valign="bottom" style="width:11.82%;border-top:1pt none #D9D9D9 ;border-left:1pt none #D9D9D9 ;border-bottom:1pt none #D9D9D9 ;border-right:1pt none #D9D9D9 ;background-color: #auto;height:1.00pt;padding:0pt;">
<p style="margin:0pt;font-family:Times New Roman;height:1.00pt;overflow:hidden;font-size:0pt;">
</p>
</td>
Keep on mind that you need:
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringEscapeUtils;