1

I have an app (master) which distributes work to n amount of workers. Inside the worker js I have hooked the console output as follows:

console._log = console.log;
console._error = console.error;
console.log = (...args) => {
    process.send({
        cmd:'log',
        channel:'out',
        data: args.join(' ')
    });
};

console.error = (...args) => {
    process.send({
        cmd:'log',
        channel:'err',
        data: args.join(' ')
    });
};

The master now is responsible of logging all incoming messages into a file besides std. Accomplished with the following code & module:

const intercept = require('intercept-stdout');
const stripAnsi = require('strip-ansi');

const unhook_intercept = intercept(function (str) {

    // stdout

    fs.appendFileSync(lib.logOutFile(), stripAnsi(str));

}, function (str) {

    // stderr

    fs.appendFileSync(lib.logErrFile(), stripAnsi(str));

});

I have noticed in the logs that a worker after 1,5 day stopped sending messages. In the master I have worker exit detection:

cluster.on('exit', (worker, code, signal) => {

    if (signal) {

        console.log(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.yellow(`was killed by signal: ${signal}`)}`);

    } else if (code !== 0) {

        console.error(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.red(`exited with error code: ${code}`)}`);

        let newWorker = cluster.fork();
        let data = work[worker.process.pid];
        let d = new Date();

        status[worker.process.pid].status = 'dead';
        status[newWorker.process.pid] = {
            started: `${d.toLocaleDateString()} ${d.toLocaleTimeString()}`,
            status: 'alive'
        };

        delete work[worker.process.pid];

        work[newWorker.process.pid] = data;

        newWorker.send({
            options: cfg.options,
            websites: work[newWorker.process.pid]
        });

    } else {

        delete work[worker.process.pid];
        delete status[worker.process.pid]

        console.log(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.green('exited successfully')}`);

    }

});

Exit was not triggered as I have seen in the logs. At the moment I have only assumptions and I'd like your opinions. Could it be because:

  1. The synchronous file logging.
  2. A worker disconnected on its own.
  3. A worker exited and the exit event was missed.
  4. Your opinion...
0x_Anakin
  • 3,229
  • 5
  • 47
  • 86

0 Answers0