20

Looking for a way to compress images in a pdf and to output a pdf for archiving. I cannot compress the images before creation as it would compromise the quality of the print.

The size of each pdf is around 8MB with the bulk of this being made up of 2 images. Images are in png format and are brought into pdf during generation(3rd party generator used)

Is there a way to compress these in java without using a 3rd party tool. I have tried with pdfbox, itext and a 3rd party exe(neevia), the 3rd party tool the only one that has given me any results so far(Down to around half a MB) but I do not want to relinquish control to an exe. Sample code is below.

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;

import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDStream;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.PdfWriter;

public class compressPDF {

public static void main (String[] args) throws IOException, DocumentException, COSVisitorException {


    /*
     * Using PDF Box
     */

    PDDocument doc; // = new PDDocument();

    doc = PDDocument.load("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PDStream stream= new PDStream(doc);
    stream.addCompression();

    doc.save("C:/_dev_env_/TEMP/compressPDF/compressed_pdfBox.pdf");

    doc.close();

    /*
     * Using itext
     */

    PdfReader reader = new PdfReader("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF");

    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("C:/_dev_env_/TEMP/compressPDF/compressed_Itext.pdf"), PdfWriter.VERSION_1_5);
    stamper.setFullCompression();
    stamper.getWriter().setCompressionLevel(50);
    int total = reader.getNumberOfPages() + 1;
    for (int i = 1; i < total; i++) {
        reader.setPageContent(i, reader.getPageContent(i));
    }
    stamper.close();
    reader.close();

    /*
     * Using 3rd party - Neevia 
     */
    try {
    Process process = new ProcessBuilder("C:/Program Files (x86)/neeviaPDF.com/PDFcompress/cmdLine/CLcompr.exe","C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressed_Neevia.pdf").start();
    InputStream is = process.getInputStream();
    InputStreamReader isr = new InputStreamReader(is);
    BufferedReader br = new BufferedReader(isr);
    String line;

    System.out.printf("Output of running %s is:", Arrays.toString(args));

    while ((line = br.readLine()) != null) {
      System.out.println(line);
    }
    } catch (Exception e) {
        System.out.println(e);
    } finally {
        System.out.println("Created!!");
    }

}

}
Daniel Mulcahy
  • 653
  • 1
  • 5
  • 14
  • What are you saving the images as? Have you considered a lossless format like png? – Taylor Dec 16 '13 at 15:28
  • Images are stored in png format and brought into the document using a generator called doc1(3rd party). Thanks for the quick reply :) – Daniel Mulcahy Dec 16 '13 at 15:35
  • 1
    If the bulk of the pdf size is those images and they're already compressed, you might be at an impasse. I did some research in the interim and pdf will store images with compression: https://en.wikipedia.org/wiki/Pdf#Raster_images so you may want to tune how the pdf is assembled. Not sure much can be done once the pdf is already assembled unless you remove and re-add the images. – Taylor Dec 16 '13 at 15:37
  • Thanks Taylor, I have compressed the pdf's using tinyPNG. I reckon this will knock around 50% off. It is still probably not enough so I may need to use the 3rd party tool. If anyone knows how to extract and re-add the image I would be interested in giving that a go also... – Daniel Mulcahy Dec 16 '13 at 16:03
  • Your iText code looks funny because it doesn't do anything to reduce the image size! Have you taken a look at this example http://itextpdf.com/examples/iia.php?id=286 and this solution http://support.itextpdf.com/node/23 – Bruno Lowagie Dec 16 '13 at 16:23
  • 1
    Thanks Bruno. That put me on the right path. With some minor modifications to that code I have exactly what I need. I was able to get the 7546KB down to 408KB. Result! I will post the modified code now :) – Daniel Mulcahy Dec 16 '13 at 18:06
  • I dont think setFullCompression and setCompressionLevel should be used together. Also, valid levels are 0 - 9. Not 50; – steve Jul 23 '16 at 19:57
  • @DanielMulcahy YOUR CODE is using so many classes have u import any library ? – Erum Nov 22 '17 at 05:10

2 Answers2

43

I used code below for a proof of concept... Works a treat :) Thanks to Bruno for setting me on the right path :)

package compressPDF;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;

import javax.imageio.ImageIO;

import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
import com.itextpdf.text.pdf.parser.PdfImageObject;

public class ResizeImage {

/** The resulting PDF file. */
//public static String RESULT = "results/part4/chapter16/resized_image.pdf";
/** The multiplication factor for the image. */
public static float FACTOR = 0.5f;

/**
 * Manipulates a PDF file src with the file dest as result
 * @param src the original PDF
 * @param dest the resulting PDF
 * @throws IOException
 * @throws DocumentException 
 */
public void manipulatePdf(String src, String dest) throws IOException, DocumentException {
    PdfName key = new PdfName("ITXT_SpecialId");
    PdfName value = new PdfName("123456789");
    // Read the file
    PdfReader reader = new PdfReader(src);
    int n = reader.getXrefSize();
    PdfObject object;
    PRStream stream;
    // Look for image and manipulate image stream
    for (int i = 0; i < n; i++) {
        object = reader.getPdfObject(i);
        if (object == null || !object.isStream())
            continue;
        stream = (PRStream)object;
       // if (value.equals(stream.get(key))) {
        PdfObject pdfsubtype = stream.get(PdfName.SUBTYPE);
        System.out.println(stream.type());
        if (pdfsubtype != null && pdfsubtype.toString().equals(PdfName.IMAGE.toString())) {
            PdfImageObject image = new PdfImageObject(stream);
            BufferedImage bi = image.getBufferedImage();
            if (bi == null) continue;
            int width = (int)(bi.getWidth() * FACTOR);
            int height = (int)(bi.getHeight() * FACTOR);
            BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
            AffineTransform at = AffineTransform.getScaleInstance(FACTOR, FACTOR);
            Graphics2D g = img.createGraphics();
            g.drawRenderedImage(bi, at);
            ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
            ImageIO.write(img, "JPG", imgBytes);
            stream.clear();
            stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);
            stream.put(PdfName.TYPE, PdfName.XOBJECT);
            stream.put(PdfName.SUBTYPE, PdfName.IMAGE);
            stream.put(key, value);
            stream.put(PdfName.FILTER, PdfName.DCTDECODE);
            stream.put(PdfName.WIDTH, new PdfNumber(width));
            stream.put(PdfName.HEIGHT, new PdfNumber(height));
            stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));
            stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);
        }
    }
    // Save altered PDF
    PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(dest));
    stamper.close();
    reader.close();
}

/**
 * Main method.
 *
 * @param    args    no arguments needed
 * @throws DocumentException 
 * @throws IOException
 */
public static void main(String[] args) throws IOException, DocumentException {
    //createPdf(RESULT);
    new ResizeImage().manipulatePdf("C:/_dev_env_/TEMP/compressPDF/TRPT_135002_1470_20131212_121423.PDF", "C:/_dev_env_/TEMP/compressPDF/compressTest.pdf");
}

}
Daniel Mulcahy
  • 653
  • 1
  • 5
  • 14
5

Just to update the excellent answer from @Daniel, I update his code to be compatible with iText7.

package opencde.builder.compresspdf;

import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;

import javax.imageio.ImageIO;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.xobject.PdfImageXObject;
import com.itextpdf.layout.element.Image;

public class ResizeImageV7 {
    
    // Logging
    private static Logger logger = LoggerFactory.getLogger(ResizeImageV7.class);

    /**
     * Manipulates a PDF file src with the file dest as result
     * 
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public void manipulatePdf(String src, String dest,Float resizeFactor) throws IOException {
        
        //Get source pdf
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(src), new PdfWriter(dest));

        // Iterate over all pages to get all images.
        for (int i = 1; i <= pdfDoc.getNumberOfPages(); i++)
        {
            PdfPage page = pdfDoc.getPage(i);
            PdfDictionary pageDict = page.getPdfObject();
            PdfDictionary resources = pageDict.getAsDictionary(PdfName.Resources);
            // Get images
            PdfDictionary xObjects = resources.getAsDictionary(PdfName.XObject);
            for (Iterator<PdfName> iter = xObjects.keySet().iterator() ; iter.hasNext(); ) {
                // Get image
                PdfName imgRef = iter.next();
                PdfStream stream = xObjects.getAsStream(imgRef);
                PdfImageXObject image = new PdfImageXObject(stream);
                BufferedImage bi = image.getBufferedImage();
                if (bi == null)
                    continue;
                
                // Create new image
                int width = (int) (bi.getWidth() * resizeFactor);
                int height = (int) (bi.getHeight() * resizeFactor);
                BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
                AffineTransform at = AffineTransform.getScaleInstance(resizeFactor, resizeFactor);
                Graphics2D g = img.createGraphics();
                g.drawRenderedImage(bi, at);
                ByteArrayOutputStream imgBytes = new ByteArrayOutputStream();
                
                // Write new image
                ImageIO.write(img, "JPG", imgBytes);
                Image imgNew =new Image(ImageDataFactory.create(imgBytes.toByteArray()));
                
                // Replace the original image with the resized image
                xObjects.put(imgRef, imgNew.getXObject().getPdfObject());
            }          
        }
        
        pdfDoc.close();
    }

    /**
     * Main method.
     *
     * @param src  the original PDF
     * @param dest the resulting PDF
     * @param resizeFactor factor to multiplicate to resize image
     * @throws IOException
     */
    public static void main(String[] args) throws IOException {
        //Get input parametres
        if (args.length<3 ) {
            System.out.println("Source PDF, Destination PDF and Resize Factor must be provided as parametres");
        } else {
            String sourcePDF=args[0];
            String destPDF=args[1];
            Float resizeFactor=Float.valueOf(new String(args[2]));
            logger.info("Inovking Resize with args, source:" + sourcePDF
                    + " destination:" + destPDF 
                    + " factor:" + resizeFactor);
            //Call method to resize images
            new ResizeImageV7().manipulatePdf(sourcePDF,destPDF,resizeFactor);
            logger.info("PDF resized");
        }
    }

}
Marti Pàmies Solà
  • 611
  • 1
  • 6
  • 12