I am currently trying to parse the url and text from a table of contents on a google doc and write them into a table on google sheets.
So far I have been successful in getting the text and the url using the following code modified from the top answer in this post (How to use .findElement(DocumentApp.ElementType.TABLE_OF_CONTENTS) to get and parse a Document's Table of Contents Element)
function parseTOC( docId ) {
var contents = [];
var doc = DocumentApp.openById(docId);
// Define the search parameters.
var searchElement = doc.getBody();
var searchType = DocumentApp.ElementType.TABLE_OF_CONTENTS;
// Search for TOC. Assume there's only one.
var searchResult = searchElement.findElement(searchType);
if (searchResult) {
// TOC was found
var toc = searchResult.getElement().asTableOfContents();
// Parse all entries in TOC. The TOC contains child Paragraph elements,
// and each of those has a child Text element. The attributes of both
// the Paragraph and Text combine to make the TOC item functional.
var numChildren = toc.getNumChildren();
for (var i=0; i < numChildren; i++) {
var itemInfo = {}
var tocItem = toc.getChild(i).asParagraph();
var tocItemAttrs = tocItem.getAttributes();
var tocItemText = tocItem.getChild(0).asText();
// Set itemInfo attributes for this TOC item, first from Paragraph
itemInfo.text = tocItem.getText(); // Displayed text
// ... then from child Text
itemInfo.linkUrl= tocItemText.getLinkUrl(); // URL Link in document
contents.push(itemInfo);
}
}
// Return array of objects containing TOC info
return contents;
}
function test_parseTOC() {
var fileId = '--Doc-ID--';
var array = parseTOC(DocumentApp.getActiveDocument().getId() );
Logger.log(array)
}
The function test_parseTOC returns the following array:
[{linkUrl=#heading=h.nyq88bov1u8x, text=Google}, {text=Help, linkUrl=#heading=h.9lthewlyeqjd}]
Although the information is correct, the "=" is giving me trouble. When working with an array of objects I would expect it to be:
[{linkUrl: "#heading=h.nyq88bov1u8x", text: "Google"}, {text: "Help", linkUrl: "#heading:h.9lthewlyeqjd"}]
What I am trying to emulate can be outlined by this post here (Google Apps Script: how to copy array of objects to range?)
If I manually change the "=" to ":" and add quotation marks then the array works fine. Is there something with the original function I can change to produce a ":" instead of a "="? Or is there a way to modify the array of objects after the fact to replace the "=" with ":"? I could easily use concat to automatically add the quotation marks, but the "=" is out of my wheelhouse.
Thank you for any input on this
Update: It actually turns out that this code was working appropriately and I was receiving an error from a different function that was using the output of this one (i.e. I wrote .getSheets(0)
not .getSheets()[0]
. I will leave up the original post in case anyone needs a working example of how to extract out links from a TOC and along with the resources to write it to a google sheet