0

I have some sought-of Desktop OCR application that is being developed in ReactJs. PDFjs library is used to read the PDF when the scan button is clicked. I would like to run this process in the background because one user tries to interact with other parts of the application. The scan process gets interrupted.

I first tried to use worker Because I am totally new to this thing. It did not work out. Then I tried to achieve this with ipcRenedrer but both of these use Structured Clone Algorithm. The code is passing functions and states to different components. I am not able to work around this one.

When the Button is clicked. The following component is executed.

    <CodeScanActionBar
      status={codeScanStatus}
      progress={codeScanProgress}
      abortController={abortController}
      onRun={() => {
        runCodeScan({
          pdf,
          pdfPath: file,
          patientsFolder,
          setLogs: setCodeScanStatus,
          setProgress: setCodeScanProgress,
          cropCords: ocrSettings,
          setPromptMessage,
          abortControllerSignal,
          onSelectFile,
          sortToPatients
        })
      }
    }
    /> 

Above component calls following method


const runCodeScan = ({
  setLogs,
  setProgress,
  pdfPath,
  cropCords,
  patientsFolder,
  pdf,
  abortControllerSignal,
  onSelectFile,
  sortToPatients = false
}) => {
  codeScanService
    .scanCodes({
      setLogs,
      setProgress,
      pdfPath,
      patientsFolder,
      cropCords,
      pdf,
      abortControllerSignal,
      sortToPatients
    })
    .then(([errors, missingPages, createdFiles, errorsFilePath]) => {
      setLogs('Finished.');
      onSelectFile(errorsFilePath)
    })
    .catch((err) => {
      console.log(err);
      setLogs(`Error: ${err.message ? err.message : err.toString()}`);
    })
    .finally(() => {
      setProgress(100);
    });
};

The actual piece of the code where processing takes place.

export const scanCodes = async ({
  setLogs,
  setProgress,
  pdfPath,
  patientsFolder,
  cropCords,
  pdf,
  abortControllerSignal,
  sortToPatients,
}) =>
  new Promise(async (resolve, reject) => {
    const pageCodes = [];
    const newFiles = [];

    setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.STARTING"));

    if (!model) {
      wasmBinary = await loadWasmBinary();
      engine = await createOCREngine({ wasmBinary });
      const modelPath = app.isPackaged
        ? path.join(process.resourcesPath, 'public/lang-data/eng.traineddata')
        : path.resolve('public/lang-data/eng.traineddata');
      model = await fs.promises.readFile(modelPath);
      await engine.loadModel(model);
      await engine.setVariable('tessedit_pageseg_mode', '7');
      await engine.setVariable('tessedit_char_whitelist', ' 0123456789');
    }

    setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.SCANNING"));

    for (let i = 0; i < pdf._pdfInfo.numPages; i++) {
      if (abortControllerSignal && abortControllerSignal.aborted) {
        setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
        setProgress(0);
        return;
      }

      const cropCordsConfig = await configService.get('ocr-position');
      const cords = {
          x: cropCordsConfig?.x || cropCords.x,
          y: cropCordsConfig?.y || cropCords.y,
          width: cropCordsConfig?.width || cropCords.width,
          height: cropCordsConfig?.height || cropCords.height
      }
      const pageImage = await pdfService.readAsImageData(pdf, i + 1, cords);

      await engine.loadImage(pageImage);
      const text = engine.getText();
      pageCodes.push({
        code: text.replace(/\n/gi, ''),
        pageIndex: i,
      });

      setProgress(parseInt((i / pdf._pdfInfo.numPages) * 100));
    }

    setProgress(0);
    setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.ANALYZING"));

    const pagesWithInvalidCodeFormat = [];
    const invalidCustIdFiles = [];
    const validCustIdFiles = [];
    const custId = await configService.get('custId')

    pageCodes.forEach((c, i) => {
        const validity = validateCode(c.code);
        if(validity === null){
            // Do not replace with ===
            if(parseCode(c.code).custId == custId){
                validCustIdFiles.push(c);
            } else {
                invalidCustIdFiles.push(c)
            }
        } else {
            pagesWithInvalidCodeFormat.push({ ...c, error: validity })
        }
        return validateCode(c.code) === null
    });

    let docSorted = _.groupBy(invalidCustIdFiles, (item) => parseCode(item.code).patientId)
    docSorted = Object.keys(docSorted).map(key => docSorted[key])

    const invalidDocuments = await groupByAsync(docSorted.flat(), (item) => `Docs-CN-${parseCode(item.code).custId}`)
    const documents = await groupByAsync(validCustIdFiles, (item) => documentId(parseCode(item.code)));
    const missingPageNumbers = {};

    for (let docId of Object.keys(documents)) {
      const docPages = documents[docId];
      const docTotalPages = parseCode(docPages[0].code).totalPages;
      missingPageNumbers[docId] = [...new Array(parseInt(docTotalPages)).keys()]
        .filter((i) => !docPages.find((p) => parseCode(p.code).pageNo == i + 1))
        .map((p) => p + 1);
      setProgress(parseInt((Object.keys(documents).indexOf(docId) / Object.keys(documents).length) * 100));
    }

    setProgress(0);
    setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.CREATING"));

    for (let docId of Object.keys(documents)) {
      if (abortControllerSignal && abortControllerSignal.aborted) {
        setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
        setProgress(0);
        return;
      }

      if (missingPageNumbers[docId].length) continue;
      const docPages = documents[docId];
      // create pdf file
      const newExtractedPdf = await pdfService.extractPagesToPdf({
        file: await fs.promises.readFile(pdfPath),
        pages: docPages
          .sort((a, b) => parseInt(parseCode(a.code).pageNo) - parseInt(parseCode(b.code).pageNo))
          .map((p) => ({ originalPageNumber: parseInt(p.pageIndex) + 1 })),
      });

      const newFileName = `${docId}.pdf`;
      const newFilePath = path.join(patientsFolder, newFileName);

      let targetDir = newFilePath;
      if (sortToPatients) {
        const fileName = path.basename(newFileName);
        const splitFileName = fileName.split('-');
        const patientID = splitFileName[1];
        targetDir = path.join(patientsFolder, patientID);
        if (!fse.existsSync(targetDir)) {
          fse.mkdirs(targetDir);
        }
        targetDir = path.join(targetDir, path.basename(newFileName));
      }
      await fs.promises.writeFile(targetDir, newExtractedPdf).catch((err) => {
        alert(err.message ? err.message : err.toString());
      });

      newFiles.push(newFilePath);
      setProgress(parseInt((Object.keys(documents).indexOf(docId) / Object.keys(documents).length) * 100));
    }

    if(invalidDocuments.length){
        setProgress(0);
        setLogs('Moving files to SplitScan...');
    }

    const splitScanPath = await configService.get('splitscan-path');
    for (let docId of Object.keys(invalidDocuments)) {
      if (abortControllerSignal && abortControllerSignal.aborted) {
        setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
        setProgress(0);
        return;
      }

      const docPages = invalidDocuments[docId];
      // create pdf file
      const newExtractedPdf = await pdfService.extractPagesToPdf({
        file: await fs.promises.readFile(pdfPath),
        pages: docPages
          .map((p) => ({ originalPageNumber: parseInt(p.pageIndex) + 1 })),
      });

      const newFileName = `${docId}.pdf`;
      const newFilePath = path.join(splitScanPath, newFileName);

      await fs.promises.writeFile(newFilePath, newExtractedPdf).catch((err) => {
        alert(err.message ? err.message : err.toString());
      });
      setProgress(parseInt((Object.keys(invalidDocuments).indexOf(docId) / Object.keys(invalidDocuments).length) * 100));
    }

    const pagesOfDocumentsWithMissingPages = Object.entries(missingPageNumbers)
      .filter(([_key, value]) => value.length)
      .map(([doc, _pages]) => documents[doc])
      .map((doc) => Object.fromEntries(doc.map(({ pageIndex, code }) => [pageIndex, code])))
      .reduce((a, b) => ({ ...a, ...b }), {});

    const pagesHavingErrors = Object.fromEntries(
      pagesWithInvalidCodeFormat.map(({ pageIndex, code }) => [pageIndex, code])
    );
    const pagesExcludedFromResult = { ...pagesOfDocumentsWithMissingPages, ...pagesHavingErrors };
    let errorsFilePath = null;

    if (Object.keys(pagesExcludedFromResult).length) {
      // save error pages
      errorsFilePath = path.join(path.dirname(pdfPath), `${path.basename(pdfPath, path.extname(pdfPath))}_errors.pdf`);
      const pagesToBeExcluded = Object.keys(pagesExcludedFromResult).sort((a, b) => parseInt(a) - parseInt(b));
      const errorPdfFileContents = await pdfService
        .extractPagesToPdf({
          file: await fs.promises.readFile(pdfPath),
          pages: pagesToBeExcluded.map((pNo) => ({ originalPageNumber: parseInt(pNo) + 1 })),
        })
        .catch((err) => {
          alert('could not extract error pages !');
          console.log(err);
        });

      const errorsFileMappingPath = getMappingFile(errorsFilePath);

      await fs.promises.writeFile(errorsFilePath, errorPdfFileContents).catch(reject);

      await fs.promises
        .writeFile(
          errorsFileMappingPath,
          JSON.stringify(
            pagesToBeExcluded.map((p) => pagesExcludedFromResult[p]),
            1
          )
        )
        .catch(reject);
    }

    setProgress(100);

    resolve([
      Object.fromEntries(pagesWithInvalidCodeFormat.map((i) => [`#${i.pageIndex + 1} | '${i.code}'`, i.error])),
      Object.fromEntries(Object.entries(missingPageNumbers).filter(([_key, value]) => value.length)),
      newFiles,
      errorsFilePath,
    ]);
  });

The problem is very clear that each of these are passing methods and methods cannot be cloned. How could I run this process in the background? Any help shall be appreciated.

Qasim Ali
  • 587
  • 2
  • 11
  • 28

0 Answers0