1

I'm using PDFBox RC2 in a Windows 7 environment, Java 1.8_66. I'm using it to create a PDF from a collection of 200dpi page-sized image files, both JPEG and PNG.

It turns out that when adding JPEG files to a PDF, the PDImageXObject.createFromFile() routine fails to close an internal file handle, thus locking the image file for the lifetime of the application. When adding PNG files to a PDF, there is no problem.

Here's some sample code that reproduces the issue. Using process explorer (from sysinternals), view the open file handles for the java.exe process and run this code. My test uses about 20 full sized JPEG files. Note that after the method exits, several locked files still remain behind.

public Boolean CreateFromImages_Broken(String pdfFilename, String[] imageFilenames) {

    PDDocument doc = new PDDocument();        
    for (String imageFilename : imageFilenames) {

        try {
            PDPage page = new PDPage();
            doc.addPage(page);

            PDImageXObject pdImage = PDImageXObject.createFromFile(imageFilename, doc);

            // at this point, if the imageFilename is a jpeg, pdImage holds onto a handle for 
            // the given imageFilename and that file remains locked until the application is closed

            try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
                float scale = (float)72.0 / 200;
                page.setMediaBox(new PDRectangle((int)(pdImage.getWidth() * scale), (int)(pdImage.getHeight() * scale)));
                contentStream.drawImage(pdImage, 0, 0, pdImage.getWidth()*scale, pdImage.getHeight()*scale);
            }
        } catch (IOException ioe) {
            return false;
        }                          
    }

    try {
        doc.save(pdfFilename);
        doc.close();                  
    } catch (IOException ex) {
        return false;
    }

    return true;
} 
Ketchup201
  • 129
  • 1
  • 9

1 Answers1

1

As a workaround, I reviewed the source code for PNG and JPEG handling, and I've had success by implementing this, which seems to work for both file types:

public Boolean CreateFromImages_FIXED(String pdfFilename, String[] imageFilenames) {

    PDDocument doc = new PDDocument();        
    for (String imageFilename : imageFilenames) {

        FileInputStream fis = null;

        try {
            PDPage page = new PDPage();
            doc.addPage(page);

            PDImageXObject pdImage = null;

            // work around JPEG issue by opening up our own stream, with which
            // we can close ourselves instead of PDFBOX leaking it. For PNG
            // images, the createFromFile seems to be OK
            if (imageFilename.toLowerCase().endsWith(".jpg")) {
                fis = new FileInputStream(new File(imageFilename));
                pdImage = JPEGFactory.createFromStream(doc, fis);
            } else {
                pdImage = PDImageXObject.createFromFile(imageFilename, doc);
            }

            try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
                float scale = (float)72.0 / 200;
                page.setMediaBox(new PDRectangle((int)(pdImage.getWidth() * scale), (int)(pdImage.getHeight() * scale)));
                contentStream.drawImage(pdImage, 0, 0, pdImage.getWidth()*scale, pdImage.getHeight()*scale);                    

                if (fis != null) {
                    fis.close();
                    fis = null;
                }                    
            }

        } catch (IOException ioe) {
            return false;
        }                          
    }

    try {
        doc.save(pdfFilename);
        doc.close();                  
    } catch (IOException ex) {
        return false;
    }

    return true;        
}
Ketchup201
  • 129
  • 1
  • 9
  • 1
    You actually don't have to wait so long to close `fis` because `JPEGFactory.createFromStream(doc, fis)` already reads the whole stream into a `byte[]`. – mkl Jan 19 '16 at 09:09