I have some sought-of Desktop OCR application that is being developed in ReactJs. PDFjs library is used to read the PDF when the scan button is clicked. I would like to run this process in the background because one user tries to interact with other parts of the application. The scan process gets interrupted.
I first tried to use worker
Because I am totally new to this thing. It did not work out. Then I tried to achieve this with ipcRenedrer
but both of these use Structured Clone Algorithm
. The code is passing functions and states to different components. I am not able to work around this one.
When the Button is clicked. The following component is executed.
<CodeScanActionBar
status={codeScanStatus}
progress={codeScanProgress}
abortController={abortController}
onRun={() => {
runCodeScan({
pdf,
pdfPath: file,
patientsFolder,
setLogs: setCodeScanStatus,
setProgress: setCodeScanProgress,
cropCords: ocrSettings,
setPromptMessage,
abortControllerSignal,
onSelectFile,
sortToPatients
})
}
}
/>
Above component calls following method
const runCodeScan = ({
setLogs,
setProgress,
pdfPath,
cropCords,
patientsFolder,
pdf,
abortControllerSignal,
onSelectFile,
sortToPatients = false
}) => {
codeScanService
.scanCodes({
setLogs,
setProgress,
pdfPath,
patientsFolder,
cropCords,
pdf,
abortControllerSignal,
sortToPatients
})
.then(([errors, missingPages, createdFiles, errorsFilePath]) => {
setLogs('Finished.');
onSelectFile(errorsFilePath)
})
.catch((err) => {
console.log(err);
setLogs(`Error: ${err.message ? err.message : err.toString()}`);
})
.finally(() => {
setProgress(100);
});
};
The actual piece of the code where processing takes place.
export const scanCodes = async ({
setLogs,
setProgress,
pdfPath,
patientsFolder,
cropCords,
pdf,
abortControllerSignal,
sortToPatients,
}) =>
new Promise(async (resolve, reject) => {
const pageCodes = [];
const newFiles = [];
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.STARTING"));
if (!model) {
wasmBinary = await loadWasmBinary();
engine = await createOCREngine({ wasmBinary });
const modelPath = app.isPackaged
? path.join(process.resourcesPath, 'public/lang-data/eng.traineddata')
: path.resolve('public/lang-data/eng.traineddata');
model = await fs.promises.readFile(modelPath);
await engine.loadModel(model);
await engine.setVariable('tessedit_pageseg_mode', '7');
await engine.setVariable('tessedit_char_whitelist', ' 0123456789');
}
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.SCANNING"));
for (let i = 0; i < pdf._pdfInfo.numPages; i++) {
if (abortControllerSignal && abortControllerSignal.aborted) {
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
setProgress(0);
return;
}
const cropCordsConfig = await configService.get('ocr-position');
const cords = {
x: cropCordsConfig?.x || cropCords.x,
y: cropCordsConfig?.y || cropCords.y,
width: cropCordsConfig?.width || cropCords.width,
height: cropCordsConfig?.height || cropCords.height
}
const pageImage = await pdfService.readAsImageData(pdf, i + 1, cords);
await engine.loadImage(pageImage);
const text = engine.getText();
pageCodes.push({
code: text.replace(/\n/gi, ''),
pageIndex: i,
});
setProgress(parseInt((i / pdf._pdfInfo.numPages) * 100));
}
setProgress(0);
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.ANALYZING"));
const pagesWithInvalidCodeFormat = [];
const invalidCustIdFiles = [];
const validCustIdFiles = [];
const custId = await configService.get('custId')
pageCodes.forEach((c, i) => {
const validity = validateCode(c.code);
if(validity === null){
// Do not replace with ===
if(parseCode(c.code).custId == custId){
validCustIdFiles.push(c);
} else {
invalidCustIdFiles.push(c)
}
} else {
pagesWithInvalidCodeFormat.push({ ...c, error: validity })
}
return validateCode(c.code) === null
});
let docSorted = _.groupBy(invalidCustIdFiles, (item) => parseCode(item.code).patientId)
docSorted = Object.keys(docSorted).map(key => docSorted[key])
const invalidDocuments = await groupByAsync(docSorted.flat(), (item) => `Docs-CN-${parseCode(item.code).custId}`)
const documents = await groupByAsync(validCustIdFiles, (item) => documentId(parseCode(item.code)));
const missingPageNumbers = {};
for (let docId of Object.keys(documents)) {
const docPages = documents[docId];
const docTotalPages = parseCode(docPages[0].code).totalPages;
missingPageNumbers[docId] = [...new Array(parseInt(docTotalPages)).keys()]
.filter((i) => !docPages.find((p) => parseCode(p.code).pageNo == i + 1))
.map((p) => p + 1);
setProgress(parseInt((Object.keys(documents).indexOf(docId) / Object.keys(documents).length) * 100));
}
setProgress(0);
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.CREATING"));
for (let docId of Object.keys(documents)) {
if (abortControllerSignal && abortControllerSignal.aborted) {
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
setProgress(0);
return;
}
if (missingPageNumbers[docId].length) continue;
const docPages = documents[docId];
// create pdf file
const newExtractedPdf = await pdfService.extractPagesToPdf({
file: await fs.promises.readFile(pdfPath),
pages: docPages
.sort((a, b) => parseInt(parseCode(a.code).pageNo) - parseInt(parseCode(b.code).pageNo))
.map((p) => ({ originalPageNumber: parseInt(p.pageIndex) + 1 })),
});
const newFileName = `${docId}.pdf`;
const newFilePath = path.join(patientsFolder, newFileName);
let targetDir = newFilePath;
if (sortToPatients) {
const fileName = path.basename(newFileName);
const splitFileName = fileName.split('-');
const patientID = splitFileName[1];
targetDir = path.join(patientsFolder, patientID);
if (!fse.existsSync(targetDir)) {
fse.mkdirs(targetDir);
}
targetDir = path.join(targetDir, path.basename(newFileName));
}
await fs.promises.writeFile(targetDir, newExtractedPdf).catch((err) => {
alert(err.message ? err.message : err.toString());
});
newFiles.push(newFilePath);
setProgress(parseInt((Object.keys(documents).indexOf(docId) / Object.keys(documents).length) * 100));
}
if(invalidDocuments.length){
setProgress(0);
setLogs('Moving files to SplitScan...');
}
const splitScanPath = await configService.get('splitscan-path');
for (let docId of Object.keys(invalidDocuments)) {
if (abortControllerSignal && abortControllerSignal.aborted) {
setLogs(i18n.t("MAIN_CONTENT.CODESCAN.SUBHEADER.SCAN_STATE.READY"));
setProgress(0);
return;
}
const docPages = invalidDocuments[docId];
// create pdf file
const newExtractedPdf = await pdfService.extractPagesToPdf({
file: await fs.promises.readFile(pdfPath),
pages: docPages
.map((p) => ({ originalPageNumber: parseInt(p.pageIndex) + 1 })),
});
const newFileName = `${docId}.pdf`;
const newFilePath = path.join(splitScanPath, newFileName);
await fs.promises.writeFile(newFilePath, newExtractedPdf).catch((err) => {
alert(err.message ? err.message : err.toString());
});
setProgress(parseInt((Object.keys(invalidDocuments).indexOf(docId) / Object.keys(invalidDocuments).length) * 100));
}
const pagesOfDocumentsWithMissingPages = Object.entries(missingPageNumbers)
.filter(([_key, value]) => value.length)
.map(([doc, _pages]) => documents[doc])
.map((doc) => Object.fromEntries(doc.map(({ pageIndex, code }) => [pageIndex, code])))
.reduce((a, b) => ({ ...a, ...b }), {});
const pagesHavingErrors = Object.fromEntries(
pagesWithInvalidCodeFormat.map(({ pageIndex, code }) => [pageIndex, code])
);
const pagesExcludedFromResult = { ...pagesOfDocumentsWithMissingPages, ...pagesHavingErrors };
let errorsFilePath = null;
if (Object.keys(pagesExcludedFromResult).length) {
// save error pages
errorsFilePath = path.join(path.dirname(pdfPath), `${path.basename(pdfPath, path.extname(pdfPath))}_errors.pdf`);
const pagesToBeExcluded = Object.keys(pagesExcludedFromResult).sort((a, b) => parseInt(a) - parseInt(b));
const errorPdfFileContents = await pdfService
.extractPagesToPdf({
file: await fs.promises.readFile(pdfPath),
pages: pagesToBeExcluded.map((pNo) => ({ originalPageNumber: parseInt(pNo) + 1 })),
})
.catch((err) => {
alert('could not extract error pages !');
console.log(err);
});
const errorsFileMappingPath = getMappingFile(errorsFilePath);
await fs.promises.writeFile(errorsFilePath, errorPdfFileContents).catch(reject);
await fs.promises
.writeFile(
errorsFileMappingPath,
JSON.stringify(
pagesToBeExcluded.map((p) => pagesExcludedFromResult[p]),
1
)
)
.catch(reject);
}
setProgress(100);
resolve([
Object.fromEntries(pagesWithInvalidCodeFormat.map((i) => [`#${i.pageIndex + 1} | '${i.code}'`, i.error])),
Object.fromEntries(Object.entries(missingPageNumbers).filter(([_key, value]) => value.length)),
newFiles,
errorsFilePath,
]);
});
The problem is very clear that each of these are passing methods and methods cannot be cloned. How could I run this process in the background? Any help shall be appreciated.