0

I have a Google sheet with approaching 100 in-line images. The sheet is very slow to load. I've read that the sheet will perform better if the images are loaded as a link from a Google Drive file rather than as an embedded image in the spreadsheet.

So I would like to write a Google Apps Script which...

  1. looks through each cell on each worksheet;
  2. decides whether the cell contains an image;
  3. save the image to Google drive;
  4. get a link to the image file;
  5. delete the inline-image;
  6. reinsert the image as a link.

So far I have this...

function extract_images() {
  var spreadsheet_id = "1NGtkBj7HOqf-rvV7zdRb7lrj3MRFoJH9Ulnx9YIO7Hs" // MARKS COPY OF "RAW Exam Question Reference AQA GCSE PE 9-1"
  var images_folder = DriveApp.getFolderById("1nEF_E7rZmTpTif3uDS8B0Jnio1x6IaSm");
  var ss = SpreadsheetApp.openById(spreadsheet_id);
  var sheets = ss.getSheets();
  sheets.forEach (sh => {
    Logger.log("Looking through '"+sh.getName()+"'");
    var w = sh.getLastColumn();
    var h = sh.getLastRow();
    for (i=1; i<h; i++){
      for (j=1; j<w; j++){
        var value = sh.getRange(i,j).getValue();
        var range = sh.getRange(i,j);
        if (value.valueOf().toString().startsWith("com")){
          Logger.log(`Image at (${i},${j})`);
        }
      }
    }
  });
}

...which finds the images in the sheet by the cell value which, for an image, seems to have the format com.google.apps.maestro.server.beans.trix.impl.CellImageApiAdapter@ID where the ID is a 8 character hex code.

I have tried to use this amazing library written by Kanshi TANAIKE...

https://github.com/tanaikech/DocsServiceApp

...but I can't find a way of using it to extract one image from one cell. I can only see a way of extracting all the images at once and I get an error when I try to do that "exportSizeLimitExceeded".

I feel I am very close which is frustrating. Please help!

Mark Mills
  • 13
  • 5
  • At first, I deeply apologize the my GAS library was not useful for your situation. In your case, for example, how about directly retrieving the image files from XLSX data converted from Google Spreadsheet. In this case, you can see the sample script at "Pattern 1" in https://stackoverflow.com/a/64040243 . If that was not the direction you expect, I apologize again. – Tanaike Jan 30 '21 at 12:49
  • 1
    PLEASE don't apologise! Your script has helped me a lot. I was starting to do something similar by hand offline! I will have a look at the suggestion! – Mark Mills Jan 30 '21 at 13:44
  • Thank you for replying. I hope your issue is resolved by the method. – Tanaike Jan 31 '21 at 00:40

1 Answers1

0

Thanks to the posts suggested by @Tanaike, the solution I've come up with is as follows. It is very dirty and not well written but it works.

Workflow...

  1. Get spreadsheet ID and folder ID;
  2. Open a copy of the spreadsheet;
  3. Export as XLSX;
  4. Change filetype to zip;
  5. Unzip into a blob;
  6. Create object to hold xlsx worksheet ids/sheet names, image locations and filenames and Drive image ids for newly created images;
  7. Iterate through each blob in the blob;
  8. Find the xl/workbook.xml file and retrieve sheet id and names;
  9. Find each xl/drawings/drawing#.xml files and retrieve sheet, location and image filename;
  10. Find all images, save to drive and retrieve and map drive ids to filenames;
  11. Go through each sheet in the Google sheet;
  12. Iterate through each cell;
  13. When you find a cell starting with 'com' which ISN'T a formula, generate the image id from the sheet id, row and column, find the corresponding image name and then the corresponding google drive image id.
  14. Clear the cell contents and reinsert as an =IMAGE() formula;
  15. Add a note to the cell with the image name.
var spreadsheet_id = "SHEET_ID"; // Spreadsheet ID
var images_folder = DriveApp.getFolderById("FOLDER_ID"); // ID of folder to store images in

function extract_images() {
  
  // Let's get started
  var ss = SpreadsheetApp.openById(spreadsheet_id); // Open a copy of the spreadsheet
  const url = "https://docs.google.com/spreadsheets/export?exportFormat=xlsx&id=" + spreadsheet_id; // Export as XLSX
  const blob = UrlFetchApp.fetch(url, {headers: {authorization: `Bearer ${ScriptApp.getOAuthToken()}`}}).getBlob().setContentType(MimeType.ZIP); // Change filetype to ZIP
  const xlsx = Utilities.unzip(blob); // Unzip folder into collection of binary files
  const xlsx_workbook_filename = "xl/workbook.xml"; // This is the filename of the worksheet list!
  var worksheets = new Object(); // Create object to maps sheet names to XLSX sheet ids
  const xlsx_drawing_filename_stub = "xl/drawings/drawing"; // add '#.xml' where # is sheetid.
  var images = new Object(); // Create object to maps cell references of images to image files
  var image_ids = new Object(); // Create object to map ID of newly created image to image name

  // Let's go through every item in the xlsx blob! There may be a better way!
  xlsx.forEach(b =>{
    // Check to see if the blob name is the same as the worksheets.xml file
    if (b.getName() == xlsx_workbook_filename){
      var xlsx_workbook_file = b.getAs("text/xml"); // convert to text/xml
      Logger.log(`Found '${xlsx_workbook_filename}'`);
      var document = XmlService.parse(xlsx_workbook_file.getDataAsString("UTF-8")); // parse the xml file so I can access the nodes
      var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/spreadsheetml/2006/main"); // This is the namespace
      var root = document.getRootElement(); // Get the root element so we know where to start
      var sheets = root.getChild("sheets",namespace).getChildren("sheet",namespace); // Get a collection of sheet nodes
      // Iterate through the sheet nodes
      sheets.forEach(s => {
        worksheets[s.getAttribute("name").getValue()] = s.getAttribute("sheetId").getValue(); // Create objects to map sheet name to xlsx sheet id
      });
    }
    // Check to see if the blob name starts with the drawing file stub
    if (b.getName().startsWith(xlsx_drawing_filename_stub)){
      var this_sheet_id = b.getName().substr(19,1);
      var xlsx_drawing_file = b.getAs("text/xml"); // Convert to text/xml
      Logger.log(`Found drawing file '${b.getName()}'`);
      var document = XmlService.parse(xlsx_drawing_file.getDataAsString("UTF-8")); // Parse the xml file!
      var namespace = XmlService.getNamespace("http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"); // This is the namespace
      var root = document.getRootElement(); // Get root element so we know where to start
      var children = root.getChildren(); // Get all the children - each one is an embedded image
      Logger.log(`${children.length} images in worksheet ${this_sheet_id}`); // Nice!
      // Iterate through each embedded image
      children.forEach(t =>{
        var col = t.getChild("from",namespace).getChild("col",namespace).getText(); // Get the column reference
        var row = t.getChild("from",namespace).getChild("row",namespace).getText(); // Get the row reference
        var image_filename = t.getChild("pic",namespace).getChild("nvPicPr",namespace).getChild("cNvPr",namespace).getAttribute("name").getValue(); // Dig down!
        Logger.log(`Image in sheet ${this_sheet_id} at ${col},${row} called '${image_filename}'`);
        images[`${this_sheet_id},${col},${row}`] = image_filename; // Create object to hold location and filename!
      });
    }
    // Get the actual images!
    if (b.getContentType() == 'image/png'){
      var the_name = b.getName().substr(9); // The image name will start with 'xl/media/' otherwise!
      Logger.log(`Saving image '${the_name}'...`);
      var id = images_folder.createFile(b).setName(the_name).getId(); // Create the image file in the image folder
      Logger.log(`This is image id '${id}'`);
      image_ids[the_name] = id; // Create object to map image name to google drive image id
  }
  });;

  // Now, lets replace the images!
  var sheets = ss.getSheets();
  // Go through each worksheet in the Google sheet
  sheets.forEach (sh => {
    var sheet_name = sh.getName(); // Get the sheet name
    Logger.log(`Looking through '${sheet_name}'`);
    var sheet_id = worksheets[sheet_name]; // This is the corresponding xlsx sheet id
    Logger.log(`This is worksheet id '${sheet_id}'. Looking for images...`);
    var w = sh.getLastColumn()+1; // Last occupied column
    var h = sh.getLastRow()+1; // Last occupied row
    // Iterate through all cells!
    for (i=1; i<h; i++){
      for (j=1; j<w; j++){
        var range = sh.getRange(i,j); // Get the range object
        var value = range.getValue(); // Get the value
        Logger.log(`The cell contains '${value.valueOf().toString()}'`);
        // Only replace the image if there is an image in the cell AND it's NOT a formula!
        if (value.valueOf().toString().startsWith("com") && !range.getFormula()){
          var img_ref = `${sheet_id},${j-1},${i-1}`; // Generate the image reference
          var img_name = images[img_ref]; // Get the image name
          var img_id = image_ids[img_name]; // Get the Google drive image id
          Logger.log(`Image at ${img_ref}`);
          Logger.log(`This is called ${img_name}`);
          Logger.log(`It's ID is ${img_id}`);
          range.setValue(`=IMAGE("https://docs.google.com/uc?export=view&id=${img_id}",1)`); // Use 'IMAGE()' to embed the image
          range.setNote(`${img_name}`); // Add a note with the image name, cause I can.
        }
      }
    }
  });
}
Mark Mills
  • 13
  • 5