0

I am trying to fill and combine multiple forms without flattening(need to keep them interactive for users). However I notice a problem. I have PDF files that contain the forms I am trying to fill. The form fields have their fonts set in adobe PDF. I notice after I combine the forms the fields lose their original fonts. Here is my program.

using iText.Forms;
using iText.Kernel.Pdf;
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;

namespace PdfCombineTest
{
    class Program
    {
        static void Main(string[] args)
        {
            Stream file1;
            Stream file2;
            using (var stream = new FileStream("./pdf-form-1.pdf", FileMode.Open, FileAccess.Read))
            {
                file1 = Program.Fill(stream, new[] { KeyValuePair.Create("Text1", "TESTING"), KeyValuePair.Create("CheckBox1", "Yes") });
            }

            using (var stream = new FileStream("./pdf-form-2.pdf", FileMode.Open, FileAccess.Read))
            {
                file2 = Program.Fill(stream, new[] { KeyValuePair.Create("Text2", "text 2 text") });
            }

            using (Stream output = Program.Combine(new[] { file1, file2 }))
            {
                using (var fileStream = File.Create("./output.pdf"))
                {
                    output.CopyTo(fileStream);
                }
            }
        }

        public static Stream Combine(params Stream[] streams)
        {
            MemoryStream copyStream = new MemoryStream();
            PdfWriter writer = new PdfWriter(copyStream);
            writer.SetSmartMode(true);
            writer.SetCloseStream(false);
            PdfPageFormCopier formCopier = new PdfPageFormCopier();

            using (PdfDocument combined = new PdfDocument(writer))
            {
                combined.InitializeOutlines();

                foreach (var stream in streams)
                {
                    using (PdfDocument document = new PdfDocument(new PdfReader(stream)))
                    {
                        document.CopyPagesTo(1, document.GetNumberOfPages(), combined, formCopier);
                    }
                }
            }

            copyStream.Seek(0, SeekOrigin.Begin);
            return copyStream;
        }

        public static Stream Fill(Stream inputStream, IEnumerable<KeyValuePair<string, string>> keyValuePairs)
        {
            MemoryStream outputStream = new MemoryStream();
            PdfWriter writer = new PdfWriter(outputStream);
            writer.SetCloseStream(false);

            using (PdfDocument document = new PdfDocument(new PdfReader(inputStream), writer))
            {
                PdfAcroForm acroForm = PdfAcroForm.GetAcroForm(document, true);
                acroForm.SetGenerateAppearance(true);
                IDictionary<string, iText.Forms.Fields.PdfFormField> fields = acroForm.GetFormFields();


                foreach (var kvp in keyValuePairs)
                {
                    fields[kvp.Key].SetValue(kvp.Value);
                }
            }

            outputStream.Seek(0, SeekOrigin.Begin);
            return outputStream;
        }
    }
}

I've noticed after several hours of debugging that PdfPageFormCopier excludes the default resources which contain fonts when merging form fields, is there a way around this? The project I'm working on currently does this process in ItextSharp and it works as intended. However we are looking to migrate to iText7.

Here are links to some sample pdf's I made I can't upload the actual pdf's I'm working with but these display the same problem.

https://www.dropbox.com/s/pukt91d4xe8gmmo/pdf-form-1.pdf?dl=0 https://www.dropbox.com/s/c52x6bc99gnrvo6/pdf-form-2.pdf?dl=0

esock
  • 46
  • 4

1 Answers1

0

So my solution was to modify the PdfPageFormCopier class from iText. The main issue is in the function below.

        public virtual void Copy(PdfPage fromPage, PdfPage toPage) {
            if (documentFrom != fromPage.GetDocument()) {
                documentFrom = fromPage.GetDocument();
                formFrom = PdfAcroForm.GetAcroForm(documentFrom, false);
            }
            if (documentTo != toPage.GetDocument()) {
                documentTo = toPage.GetDocument();
                formTo = PdfAcroForm.GetAcroForm(documentTo, true);
            }
            if (formFrom == null) {
                return;
            }
            //duplicate AcroForm dictionary
            IList<PdfName> excludedKeys = new List<PdfName>();
            excludedKeys.Add(PdfName.Fields);
            excludedKeys.Add(PdfName.DR);
            PdfDictionary dict = formFrom.GetPdfObject().CopyTo(documentTo, excludedKeys, false);
            formTo.GetPdfObject().MergeDifferent(dict);
            IDictionary<String, PdfFormField> fieldsFrom = formFrom.GetFormFields();
            if (fieldsFrom.Count <= 0) {
                return;
            }
            IDictionary<String, PdfFormField> fieldsTo = formTo.GetFormFields();
            IList<PdfAnnotation> annots = toPage.GetAnnotations();
            foreach (PdfAnnotation annot in annots) {
                if (!annot.GetSubtype().Equals(PdfName.Widget)) {
                    continue;
                }
                CopyField(toPage, fieldsFrom, fieldsTo, annot);
            }
        }

Specifically the line here.

excludedKeys.Add(PdfName.DR);

If you walk the the code in the CopyField() function eventually you will end in the PdfFormField class. You can see the constructor below.

        public PdfFormField(PdfDictionary pdfObject)
            : base(pdfObject) {
            EnsureObjectIsAddedToDocument(pdfObject);
            SetForbidRelease();
            RetrieveStyles();
        }

The function RetrieveStyles() will try to set the font for the field based on the default appearance. However that will not work. Due to the function below.

        private PdfFont ResolveFontName(String fontName) {
            PdfDictionary defaultResources = (PdfDictionary)GetAcroFormObject(PdfName.DR, PdfObject.DICTIONARY);
            PdfDictionary defaultFontDic = defaultResources != null ? defaultResources.GetAsDictionary(PdfName.Font) : 
                null;
            if (fontName != null && defaultFontDic != null) {
                PdfDictionary daFontDict = defaultFontDic.GetAsDictionary(new PdfName(fontName));
                if (daFontDict != null) {
                    return GetDocument().GetFont(daFontDict);
                }
            }
            return null;
        }

You see it is trying to see if the font exists in the default resources which was explicitly excluded in the PdfPageFormCopier class. It will never find the font.

So my solution was to create my own class that implements the IPdfPageExtraCopier interface. I copied the code from the PdfPageFormCopier class and removed the one line excluding the default resources. Then I use my own copier class in my code. Not the prettiest solution but it works.

esock
  • 46
  • 4
  • You are aware that different pdfs may have different entries in their default resources for the same name? E.g. different fonts? Simply copying the default resources as they are can cause very funny effects in such a situation... ;) – mkl Jun 04 '20 at 04:43