I implemented a XHTML converter to DocX using DocX4J. It creates the DocX file without problems.
To finish my task I decided to implement a simple test. The test consists in counting the number os chars in the DocX created and then comparing it with the already known number of characters from the XHTML (see source code below).
The code of my test is based on a sample from DocX4J site but does not work for me. Although I can see that the content of the DocX created by my converter is equal to the content of the XHTML file, my test code always returns zero to the number of chars of the DocX file. :-\
Does anyone can help me to discover the cause of this unexpected result ?
Thanks in advance!
package main;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import org.docx4j.TextUtils;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.exceptions.InvalidFormatException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.relationships.Relationship;
import org.docx4j.wml.CTAltChunk;
import org.docx4j.wml.Document;
/**
* Count chars from a DocX file generated from a XHTML using Docx4J
*
* @author Cláudio
*/
public class CountChars {
public static void main(String[] args) {
String xhtml = "<html><body><table border=\"1\"><tr><td>Propriedade</td><td>Amostra 1</td><td>Amostra 2</td></tr><tr><td>Prop1</td><td>10.0</td><td>111.0</td></tr><tr><td>Prop2</td><td>20.0</td><td>222.0</td></tr></table></body></html>";
int expectedNChars = 57;
WordprocessingMLPackage docx = export(xhtml);
try {
docx.save(new File("test.docx")); // Proves that docx is
// successfully created
} catch (Docx4JException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (countCharacters(docx) == expectedNChars) {
System.out.println("Success");
} else {
System.out.println("Fail");
}
}
private static WordprocessingMLPackage export(String xhtml) {
WordprocessingMLPackage wordMLPackage = null;
AlternativeFormatInputPart afiPart = null;
Relationship altChunkRel = null;
try {
wordMLPackage = WordprocessingMLPackage.createPackage();
afiPart = new AlternativeFormatInputPart(new PartName("/hw.html"));
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
afiPart.setBinaryData(xhtml.getBytes());
afiPart.setContentType(new ContentType("text/html"));
try {
altChunkRel = wordMLPackage.getMainDocumentPart().addTargetPart(
afiPart);
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// .. the bit in document body
CTAltChunk ac = Context.getWmlObjectFactory().createCTAltChunk();
ac.setId(altChunkRel.getId());
wordMLPackage.getMainDocumentPart().addObject(ac);
// .. content type
wordMLPackage.getContentTypeManager().addDefaultContentType("html",
"text/html");
return wordMLPackage;
}
/**
* Counts chars (even whitespaces) in a docx.
*
* Referência:
* http://www.docx4java.org/forums/docx-java-f6/how-to-count-number
* -of-characters-in-a-docx-file-t767.html
*
* @param docx
* Document
*
* @return Number of chars in the document
*/
private static int countCharacters(WordprocessingMLPackage docx) {
String strString = null;
MainDocumentPart documentPart = docx.getMainDocumentPart();
Document wmlDocument = documentPart.getJaxbElement();
StringWriter strWriter = null;
try {
strWriter = new StringWriter();
TextUtils.extractText(wmlDocument, strWriter);
strString = strWriter.toString();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (strWriter != null) {
try {
strWriter.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
if (strString == null) {
throw new NullPointerException();
}
return strString.length();
}
}