I'm a bit confused because all the examples I read about Node cluster
module only seem to apply to webservers and concurrent requests. Otherwise to CPU intensive application it is recommended to use the worker_threads
module.
And what about I/O file operations? Imagine I have an array with 1 million filenames: ['1.txt', '2.txt', etc., ..., '1000000.txt']
and I need to do heavy processing and then write the result file content?
What would be the method to efficiently use all the cores of the CPU to spread the processing towards different cores amongst different filenames?
Normally I would use this:
const fs = require('fs')
const fs = require('async')
const heavyProcessing = require('./heavyProcessing.js')
const files = ['1.txt', '2.txt', ..., '1000000.txt']
async.each(files, function (file, cb) {
fs.writeFile(file, heavyProcessing(file), function (err) {
if (!err) cb()
})
}
Should I use now the cluster
or the worker_threads
? And how should I use it?
Does this work?
const fs = require('fs')
const fs = require('async')
const heavyProcessing = require('./heavyProcessing.js')
const cluster = require('node:cluster');
const http = require('node:http');
const numCPUs = require('node:os').cpus().length;
const process = require('node:process');
if (cluster.isPrimary) {
console.log(`Primary ${process.pid} is running`);
// Fork workers.
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`worker ${worker.process.pid} died`);
});
} else {
const files = ['1.txt', '2.txt', ..., '1000000.txt']
async.each(files, function (file, cb) {
fs.writeFile(file, heavyProcessing(file), function (err) {
if (!err) cb()
})
}
}