4

I am trying to use fibers with streams:

var Fiber = require('fibers');
var Future = require('fibers/future');
var fs = require('fs');

function sleepForMs(ms) {
  var fiber = Fiber.current;
  setTimeout(function() {
    fiber.run();
  }, ms);
  Fiber.yield();
}

function catchError(f, onError) {
  return function () {
    var args = arguments;
    var run = function () {
      try {
        var ret = f.apply(null, args);
      }
      catch (e) {
        onError(e);
      }
      return ret;
    };
    if (Fiber.current) {
      return run();
    }
    else {
      return Fiber(run).run();
    }
  }
}

function processFile(callback) {
  var count, finished, onData, onException, onIgnoredEntry;
  count = 0;
  finished = false;
  onException = function (error) {
    if (finished) {
      console.error("Exception thrown after already finished:", error.stack || error);
    }
    if (finished) {
      return;
    }
    finished = true;
    return callback(error);
  };
  onData = function(data) {
    console.log("onData");
    if (finished) {
      return;
    }
    console.log("before sleep");
    sleepForMs(500);
    console.log("after sleep");
    throw new Error("test");
  };
  return fs.createReadStream('test.js').on('data', catchError(onData, onException)).on('end', function() {
    console.log("end");
    if (finished) {
      return;
    }
    finished = true;
    return callback(null, count);
  }).on('error', function(error) {
    console.log("error", error);
    if (finished) {
      return;
    }
    finished = true;
    return callback(error);
  });
};

Fiber(function () {
  console.log("Calling processFile");
  Future.wrap(processFile)().wait();
  console.log("processFile returned");
}).run();
console.log("back in main");

But it does not really work. Data callback finishes before fiber inside the callback finishes. So the above code outputs:

Calling processFile
back in main
onData
before sleep
end
processFile returned
after sleep
Exception thrown after already finished: Error: test

When in fact it should be more something like:

Calling processFile
back in main
onData
before sleep
after sleep
end
processFile returned
Error: test
Mitar
  • 6,756
  • 5
  • 54
  • 86

3 Answers3

0

Here's an implementation using wait.for (a wrapper around Fibers) https://github.com/luciotato/waitfor

In this implementation, a fiber is launched for each data chunk, so "n" tasks are launched in parallel. ProcessFile do not "returns" until all fibers complete.

This is a demo of how you can do this with Fibers & wait.for, but of course you should encapsulate the module-level vars and all functions in a class before using this in production.

var wait = require('wait.for');
var fs = require('fs');

var tasksLaunched=0;
var finalCallback;
var callbackDone=false;
var dataArr=[]

function sleepForMs(ms,sleepCallback) {
  setTimeout(function() {
    return sleepCallback();
  }, ms);
}

function resultReady(err,data){

    if (err){
      callbackDone = true;
      return finalCallback(err);
    }

    dataArr.push(data);
    if (dataArr.length>=tasksLaunched && !callbackDone) {
      callbackDone = true;
      return finalCallback(null,dataArr);
    }
}

function processChunk(data,callback) {
    var ms=Math.floor(Math.random()*1000);
    console.log('waiting',ms);
    wait.for(sleepForMs,ms);
    console.log(data.length,"chars");
    return callback(null,data.length);
}

function processFile(filename,callback) {
  var count, onData, onException, onIgnoredEntry;
  count = 0;
  finalCallback = callback;

  onException = function (error) {
    if (!callbackDone){
      callbackDone = true;
      return callback(error);
    }
  };

  onData = function(data) {
    console.log("onData");
    tasksLaunched++;
    wait.launchFiber(processChunk,data,resultReady);
  };

  fs.createReadStream(filename)
    .on('data', onData)
    .on('end', function() {
        console.log("end");
    })
    .on('error', function(error) {
        console.log("error", error);
        if (!callbackDone) {
            callbackDone = true;
            return callback(error);
          }
    });
};

function mainFiber() {
  console.log("Calling processFile");
  var data = wait.for(processFile,'/bin/bash');
  console.log(data.length,"results");
  console.log("processFile returned");
};

//MAIN
wait.launchFiber(mainFiber);
console.log("back in main");
Lucio M. Tato
  • 5,639
  • 2
  • 31
  • 30
  • 1
    I thought that the point of the question was to sequentially process arriving chunks of data using some yieldind function. – Leonid Beschastny Aug 16 '14 at 21:39
  • given the "desired output" he seems to want both tasks finished before processFile returns – Lucio M. Tato Aug 16 '14 at 22:00
  • Yes. sleep there is just an example. Imagine any other fiber-enabled functions which can yield. I would like to be able to call it inside a callback for each data chunk without having to worry about what other chunks are doing in meantime. So that things would behave like you would expect from blocking calls. – Mitar Aug 17 '14 at 08:00
  • I've altered the answer to launch a fiber for each chunk – Lucio M. Tato Aug 17 '14 at 15:27
  • Would it work if `sleepForMs` would be a fiber-yielding function and not a callback-based one? – Mitar Aug 18 '14 at 04:49
  • Yes, it will work. you can do anything inside "processChunk". – Lucio M. Tato Aug 18 '14 at 09:19
  • please paste such fiber.yielding function you want to call inside "processChunk" and I'll update the answer. I need a specific example of what you're trying to do. – Lucio M. Tato Aug 21 '14 at 01:44
  • `sleepForMs` is an example of such function. :-) – Mitar Aug 21 '14 at 19:04
  • Inside the function you must use: `wait.for(asyncFn,arg,arg...)`. Internally, wait.for calls asyncFn and then Fiber.yield(), pausing the fiber until async callback. **You "yield" (pause) on async calls and resume on callback**. Since all of node is async, this allows you to use all of node async functions as if they were sync functions. – Lucio M. Tato Aug 21 '14 at 20:00
  • You should use "wait.for" calls instead of Fiber.yield() and Fiber.run(), so you'll have to change your code. **Start from the code I've provided and try it, work on it, to see if it solves your problem**. If you run into new problems, post another question with **a running example** based on the code I've provided and using wait.for, and maybe I can help you. – Lucio M. Tato Aug 21 '14 at 20:00
0

Reduce the sleeping time and set some priority or timers to the other blocks as well. so that after certain limit of time blocks get displayed according to priority. This is how you can get the output in your desired way.

Ashraf.Shk786
  • 618
  • 1
  • 11
  • 23
0

Looks like no one knows how to do what you're asking.

In this case, you could process your stream in some traditional asynchronous way, applying your yielding function to the result.

Here are some examples of how to do so.


Collecting all stream data with raw-body

One solution is to collect all stream data before processing any of it. It could be easily done with raw-body module:

var rawBody = require('raw-body');

function processData(data) {
  console.log("before sleep");
  sleepForMs(500);
  console.log("after sleep");
}

function processFile(callback) {
  var stream = fs.createReadStream('fiber.js');
  rawBody(stream, function(err, data) {
    if (err) return callback(err);
    Fiber(processData).run(data); // process your data
    callback();
  });
}

Using this example you will:

  1. wait for all chunks to arrive
  2. initiate processing you stream data in a Fiber
  3. return from processData to main thread
  4. stream data will be processed at some point in the future

If you want, you may add try ... catch or any other exception handling to prevent processData from crushing your app.


Using smart job queue to process all chunks in a series

But if you really want to process all chunks of data the moment they arrive, you could use some smart control flow module. Here is en example of using queue feature from async module:

function processChunk(data, next) {
  return function() {
    console.log("before sleep");
    sleepForMs(500);
    console.log("after sleep");
    next();
  }
}

function processFile(callback) {
  var q = async.queue(function(data, next) {
    Fiber(processChunk(data, next)).run();
  }, 1);
  fs.createReadStream('fiber.js').on('data', function(data) {
    q.push(data);
  }).on('error', function(err) {
    callback(err);
  }).on('end', function() {
    callback(); // not waiting to queue to drain
  })
}

Using this example you will:

  1. start listening to the stream, pushing each new chunk to the processing queue
  2. return from processData to main thread the moment the stream is closed, not waiting for data chunks to be processed
  3. all data chunks will be processed in a strict series at some point in the time

I know that it's not what you've asked for, but I hope it'll help you.

Leonid Beschastny
  • 50,364
  • 10
  • 118
  • 122