0

I am trying to convert xhtml to PDF using itext or flying saucer. XHTML file generally contains CJK extension B (Japanese) characters. PDF file shows all English characters but displaying blank instead of CJK extension B (Japanese) characters.

1 Answers1

0

"" and "" are chinese characters in CJK extension B.

import com.lowagie.text.Document;
import com.lowagie.text.Font;
import com.lowagie.text.Paragraph;
import com.lowagie.text.Phrase;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.pdf.PdfWriter;

import java.awt.Desktop;
import java.io.File;
import java.io.FileOutputStream;
import java.util.LinkedHashMap;
import java.util.Map;

public class TestPdf {

    static final String DEST = "\\result.pdf";

    static final String CHI_FONT = System.getenv("WINDIR") + "\\Fonts\\mingliu.ttc,1";
    static final String CHI_EXTB_FONT = System.getenv("WINDIR") + "\\Fonts\\mingliub.ttc,1";

    private static boolean isCJKExtensionB(int codePoint){
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(codePoint);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B){
            return true;
        }
        return false;
    }

    private static Map<Integer, String> getChunks(String str){
        Map<Integer, String> resultMap = new LinkedHashMap<>();
        if (str == null)
            return resultMap;       

        int i=0, length = str.length();
        Integer lastIndex = null;
        for (int offset = 0; offset < length; ) {
            final int codepoint = str.codePointAt(offset);
            String character = str.substring(offset, offset + Character.charCount(codepoint));
            if (isCJKExtensionB(codepoint)){                   
                if(lastIndex == null)
                    resultMap.put(i, str.substring(0, offset));

                if(lastIndex != null && lastIndex < 0)
                    resultMap.put(lastIndex, resultMap.get(lastIndex) + character);
                else {
                    lastIndex = -1 * ++i;
                    resultMap.put(lastIndex, character);
                }      
            } else if(lastIndex != null) {
                if(lastIndex >= 0)
                    resultMap.put(lastIndex, resultMap.get(lastIndex) + character);
                else {
                    lastIndex = ++i;
                    resultMap.put(lastIndex, character);
                }

            }
            offset += Character.charCount(codepoint);      
        }
        if(resultMap.isEmpty())
            resultMap.put(0, str);
        return resultMap;
    }

    public void createPdf(File targetFile, String text) throws Exception {
        Document document = new Document();
        PdfWriter.getInstance(document, new FileOutputStream(targetFile));
        document.open();

        Font f = new Font(BaseFont.createFont(CHI_FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED));
        Font fontExtB = new Font(BaseFont.createFont(CHI_EXTB_FONT, BaseFont.IDENTITY_H, BaseFont.EMBEDDED));
        Paragraph pz = new Paragraph(10, "", f);
        Map<Integer, String> chunkMap = getChunks(text);            

        for(Integer index : chunkMap.keySet()) {
            String txtChunk = chunkMap.get(index);
            pz.add(new Phrase(10, txtChunk, index < 0 ? fontExtB : f));                 
        }
        pz.setKeepTogether(true);
        document.add(pz);       

        document.close();
    }

    public static void main(String[] args) throws Exception {
        File outputFile = new File(DEST);
        new TestPdf().createPdf(outputFile, "明月場");
        if (Desktop.isDesktopSupported()) {
            try {
                Desktop.getDesktop().open(outputFile);
            } catch (Exception ex) {
                // no application registered for PDFs
            }
        }
    }
}
Miller Cy Chan
  • 897
  • 9
  • 19