How do I capture the first 3 paragraphs from the second chapter in an epub file?
I am using Nodejs and the following modules epub.js
multer
fs
epub-metadata
. My app sends an epub file from the client side to the servers side for parsing.
There is a folder called uploads
in the same directory as the server.js
.
When I run my app, the server side is able to successfully capture the books Metadata
using:
// Extract metadata using epub-metadata
const title = book.metadata.title;
console.log('EPUB Metadata:', book.metadata);
Yielding:
EPUB Metadata: {
description: '¡Aprende a utilizar el storytelling para ganar cualquier concurso! Con este libro, descubre cómo destacar entre la multitud y ganar el primer lugar en cualquier competición. ¡Sé el ganador!',
language: 'es',
creator: 'Zuan Ásvarez',
creatorFileAs: 'Zuan Ásvarez',
title: '¡Sé el ganador! Utiliza el poder del storytelling para destacar en cualquier concurso y ganar el primer lugar.',
UUID: '6A5EE928-BDA4-4246-BF14-56587BD6B885',
subject: 'marketing',
'calibre:timestamp': '2023-03-31T06:42:00.358647',
cover: 'cover'
}
I am also able to successfully capture the Table of contents
using:
// Find the table of contents
const tableOfContents = book.toc;
console.log('############ tableOfContents ##########: ', tableOfContents);
Yielding:
############ tableOfContents ##########: [
{
level: 0,
order: 1,
title: 'Capítulo 1: Introducción: ¿Por qué el storytelling es importante en los concursos?',
href: 'tmp_0244159-4fb3f440-afd3-41f7-8ef2-ff4c6ff37440_W9DfRi.ch.fixed.fc.tidied.stylehacked.xfixed.sc_split_003.html#cap1',
id: 'd2cb557e-f55f-4128-9abb-c266bdf5be9b'
},
{
level: 0,
order: 2,
title: 'Capítulo 2: Identifica tu audiencia y los objetivos del concurso',
href: 'tmp_0244159-4fb3f440-afd3-41f7-8ef2-ff4c6ff37440_W9DfRi.ch.fixed.fc.tidied.stylehacked.xfixed.sc_split_004.html#cap2',
id: 'a4ac0fda-3bd5-45ab-b602-af38e603fb82'
},
.
.
.
What I am really interested in are the first 3 paragraphs of the second chapter.
Find below the entire code.
app.post('/captureBook', upload.single('file'), async (request, response) => {
try {
console.log('## You are in Capture book ###');
// Access the file using request.file
const file = request.file;
if (!file) {
throw new Error('No file uploaded');
}
// Adjust the file paths
const uploadsFolderPath = path.join(__dirname, 'uploads');
const epubFilePath = path.join(uploadsFolderPath, file.filename);
const imagewebroot = '/images/'; // Adjust the prefix for image URLs as needed
const chapterwebroot = '/chapters/'; // Adjust the prefix for chapter URLs as needed
// Create an EPub instance
const book = new EPub(epubFilePath, imagewebroot, chapterwebroot);
// Open the EPUB file
book.on('end', async function () {
try {
// Retrieve the book title
const title = book.metadata.title;
console.log('EPUB Metadata:', book.metadata);
// Extract metadata using epub-metadata
const epubMetadata = await EpubMetadata(epubFilePath);
console.log('Extracted Metadata:', epubMetadata);
// Find the table of contents
const tableOfContents = book.toc;
console.log('############ tableOfContents ##########: ', tableOfContents);
// Loop through the table of contents
for (let item of tableOfContents) {
console.log('########## Table of Contents Item:', item);
// Retrieve the item content
const itemContentPath = path.join(uploadsFolderPath, item.href); // Adjust the path to include the uploads folder
console.log('Loading item content:', itemContentPath);
// Wrap the book.getChapter function in a promise
const getItemContent = () => {
return new Promise((resolve, reject) => {
book.getChapter(itemContentPath, function (error, itemContent) {
if (error) {
console.error('Failed to retrieve item content:', error);
reject(error);
} else {
console.log('Item content loaded:', itemContent);
resolve(itemContent);
}
});
});
};
try {
const itemContent = await getItemContent();
// Rest of the code...
} catch (error) {
console.error(error);
response.status(500).json({ error: 'Failed to retrieve item content' });
return; // Return to avoid further execution
}
}
// Respond with success
response.status(200).json({ message: 'Book capture completed successfully' });
} catch (error) {
console.error(error);
response.status(500).json({ error: 'Failed to process the file' });
}
});
// Handle any parsing errors
book.on('error', function (error) {
console.error(error);
response.status(500).json({ error: 'Failed to process the file' });
});
// Load the EPUB file
book.parse();
} catch (error) {
console.error(error);
response.status(500).json({ error: 'Failed to process the file' });
}
});
The section of code supposed to loop through the table of contents and capture the paragraphs, as seen below, logs Failed to retrieve item content: Error: File not found
########## Table of Contents Item: {
level: 0,
order: 1,
title: 'Capítulo 1: Introducción: ¿Por qué el storytelling es importante en los concursos?',
href: 'tmp_0244159-4fb3f440-afd3-41f7-8ef2-ff4c6ff37440_W9DfRi.ch.fixed.fc.tidied.stylehacked.xfixed.sc_split_003.html#cap1',
id: 'd2cb557e-f55f-4128-9abb-c266bdf5be9b'
}
Loading item content: /home/sirbt/Desktop/epubAI/epubAI/uploads/tmp_0244159-4fb3f440-afd3-41f7-8ef2-ff4c6ff37440_W9DfRi.ch.fixed.fc.tidied.stylehacked.xfixed.sc_split_003.html#cap1
Failed to retrieve item content: Error: File not found
How do I modify my code to ensure that it works?