0

Till now am able to parse a docx file using docx4j and find the bookmarks and all the tables in a docx file using below code:

WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(docxFile));
List<Object> paragraphs = getAllElementFromObject(template.getMainDocumentPart(), P.class);
for (Object p : paragraphs) {
    RangeFinder rt = new RangeFinder("CTBookmark", "CTMarkupRange");
    new TraversalUtil(p, rt);
    for (CTBookmark content : rt.getStarts()) {
        if (content.getName().equals("if_supdef")) {
            List<Object> tbl = getAllElementFromObject(content, Tbl.class);
            System.out.println("tbl==" + tbl.size());
        }
    }
}

TableFinder finder = new TableFinder();
new TraversalUtil(documentPart.getContent(), finder);
System.out.println("Found " + finder.tblList.size() + " tables");

I've got these lines of code from some blogs and answers from other questions.

Now I would like to find the table only inside a bookmark (here my bookmark name is if_supdef) rather than searching in entire document. Once I find the table, I would add rows based on number of data I receive from SQL table and MERGEFIELDS available. Bookmark and its table look like something in below picture:

enter image description here

Once processed through docx4j it should look like:

enter image description here

In document.xml I see parent tag of w:tbl is body but not bookmark. Is it possible to read the table inside bookmark? If so, how? If not, what is the other alternative to uniquely identify a table and add contents to it?

s3-89
  • 75
  • 1
  • 8
  • Do you have control over the input documents? If yes, there are better approaches than bookmarks. If no, use a single finder which detects that you have hit a bookmark (these are point tags, not an element which can enclose a table), then looks for the table. – JasonPlutext Feb 04 '23 at 23:16
  • Thanks @jason I was waiting for you inputs and sorry for late response was a busy week. I don't have control on input document. Could you pls provide the API's you are referring to.? – s3-89 Feb 11 '23 at 16:43

1 Answers1

0

Try something along the lines of the below.

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.docx4j.TraversalUtil;
import org.docx4j.TraversalUtil.CallbackImpl;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.wml.CTBookmark;
import org.docx4j.wml.CTMarkupRange;
import org.docx4j.wml.Tbl;

import jakarta.xml.bind.JAXBContext;


public class TableInBookmarkFinder  {

    public static JAXBContext context = org.docx4j.jaxb.Context.jc;

    public static void main(String[] args) throws Exception {

        String inputfilepath = System.getProperty("user.dir")
                    + "/tbl_bookmarks.docx";

        WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
                .load(new java.io.File(inputfilepath));
        MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
                
        // find
        TableInBookmarkFinderCallback finder = new TableInBookmarkFinderCallback();     
        new TraversalUtil(documentPart.getContent(), finder);

        List<TableInfo> tableInfos = finder.getTableInfos();
        
        // result?
        for (TableInfo ti : tableInfos) {
            System.out.println("table contained in bookmarks:");
            for (String s: ti.getBookmarkNames()) {
                System.out.println("bookmark name: " + s);
            }
        }
    }
    
    public static class TableInfo {
        
        TableInfo(Tbl tbl, List<String>  bookmarkNames) {
            this.tbl = tbl;
            this.bookmarkNames = bookmarkNames;
        }
        
        private Tbl tbl;        
        public Tbl getTbl() {
            return tbl;
        }
        
        private List<String>  bookmarkNames;
        public List<String> getBookmarkNames() {
            return bookmarkNames;
        }

        
    }
    
    public static class TableInBookmarkFinderCallback extends CallbackImpl {

        public TableInBookmarkFinderCallback() {
            
        }
        
        /**
         * Keep this set to true unless you don't
         * want to traverse a table (eg a nested table).
         * NB: If traversing from body level, you'll need to set it to true!
         */
        private boolean traverseTables=true;
        
        
        /**
         * Track bookmarks encountered
         */
        private Map<BigInteger, String> bookmarkInfos = new HashMap<BigInteger, String>(); 
        
        /**
         * What bookmarks are we currently in?
         */
        private Set<BigInteger> currentBookmarks = new HashSet<BigInteger>(); 
        
        
        /**
         * What tables did we encounter?
         */
        private List<TableInfo> tableInfos = new ArrayList<TableInfo>();
        public List<TableInfo> getTableInfos() {
            return tableInfos;
        }

        @Override
        public List<Object> apply(Object o) {
            
            System.out.println(o.getClass().getName());
            
            if (o instanceof CTBookmark) { 
                
                CTBookmark bmStart = (CTBookmark)o;
                
                bookmarkInfos.put(bmStart.getId(), bmStart.getName());
                
                if (currentBookmarks.add(bmStart.getId()) ) {
                    // ok
                    System.out.println("added " + bmStart.getId());
                } else {
                    System.out.println("ERROR: duplicate bookmarks with id " + bmStart.getId());
                }
                
            } else /* need this else because CTBookmark extends CTMarkupRange */ 
                if (o instanceof CTMarkupRange) { 
                CTMarkupRange bmEnd = (CTMarkupRange)o;

                if (currentBookmarks.remove(bmEnd.getId()) ) {
                    // ok
                    System.out.println("removed " + bmEnd.getId());
                } else {
                    System.out.println("ERROR: no start element for bookmark with id " + bmEnd.getId());
                }
                
            } 

            if (o instanceof Tbl ) {
                System.out.println("tbl");
                
                List<String>  bookmarkNames = new ArrayList<String>(); 
                for (BigInteger bmId : currentBookmarks) {
                    bookmarkNames.add(bookmarkInfos.get(bmId));
                }
                
                tableInfos.add( new TableInfo( (Tbl)o, bookmarkNames));
            }           
            return null; 
        }
        
        @Override
        public boolean shouldTraverse(Object o) {
            
            if (traverseTables) {
                return true;
            } else {
                // Yes, unless its a nested Tbl
                return !(o instanceof Tbl);
            }
        }
        
    }
    

}
JasonPlutext
  • 15,352
  • 4
  • 44
  • 84