0

I use request module to download a zip file that cointain a .csv file, then i use pipe to read the content with unzip and split modules and then i parse and write result into mongodb with mongoose-object-stream module.
My code:

//index.js
var request = require('request');
var bun = require('bun');
var split = require('split');
var unzip = require('./lib/unzip');
var tomongo = require('./lib/tomongo');

var pipeline = bun([ unzip(), split()]);
request.get( "http://someurl/somefile.zip" )
  .pipe( pipeline )
  .pipe( tomongo() );

//tomongo.js
var mySchema = require('../schema.json');
var through = require('through2');
var mos = require('mongoose-object-stream');
var mongoose = require('mongoose');
var models = require('../models')

const dbpath = "mongodb://localhost:27017/test";
const mongo = mongoose.connect(dbpath, {useNewUrlParser: true });
mongo.then(() => {
console.log('mongoDB connected');
}).catch((err) => {
console.log('err', err);
});
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error:'));

var modelStream = new mos(models.books);

function parser(){

  var columns = mySchema;

  var parseandwrite = function( chunk, _, cb ){
    var row = {}, cells = chunk.toString('utf-8').split('\t');
    cells.forEach( function( cell, i ){
      row[ columns[ i ] ] = ( cell || '' ).trim();
    });
    if( !!chunk ){
      modelStream.write( row );
    }
    cb();
  };

  return through.obj( parseandwrite );
}

module.exports = parser;

I want to do something when the stream ends and all records are stored in the db.

I tried adding to pipe .on('finish', function(){process.exit()}) or .on('end', function(){process.exit()}) but node continue running.

  • `process.exit([code])` ... `exit` is a function. from your question it seems you are not calling it as such. Doc link: https://nodejs.org/api/process.html#process_exit_codes – Akrion Aug 09 '18 at 21:24
  • sorry, I was wrong to write, I correct the post – Francesco Bellavita Aug 09 '18 at 21:32
  • If you close your open resources Node will exit automatically. It's best practice to always gracefully close your resources, having to use `process.exit` is usually an indication of a memory leak. When you're done writing the data, close the connection to the DB. – Jake Holzinger Aug 09 '18 at 21:33
  • how i can see when all data is writed? – Francesco Bellavita Aug 09 '18 at 21:37
  • i think that i can count the lines in a previous pipe then pass this data to tomongo() and close db when i write the last line.. is this a solution or there are some better? – Francesco Bellavita Aug 09 '18 at 22:18

2 Answers2

0

Assuming that your parser method is not the problem here I would suggest moving the database connection logic into your index, you should connect to the DB before attempting to stream data to it. If you wrap the streaming logic in a Promise you can do DB connection handling logic in one Promise chain.

Here's an example of what that might look like:

var Promise = require('bluebird');
var mongoose = require('mongoose');
var MongooseObjectStream = require('mongoose-object-stream');
var request = require('request');
var split = require('split');
var through = require('through2');
var unzip = require('unzip-stream');

function streamToDB(url) {
    return new Promise((resolve, reject) => {
        request.get(url)
            .pipe(unzip.Parse())
            .pipe(through.obj(function (entry, enc, cb) {
                if (entry.path === 'file_with_content') {
                    entry.on('end', cb)
                        .on('error', cb)
                        .on('data', (data) => this.push(data));
                } else {
                    entry.autodrain()
                        .on('error', cb)
                        .on('finish', cb);
                }
            }))
            .pipe(split())
            .pipe(through.obj((line, enc, cb) => {
                cb(null, line.split('\t')); // Convert to "real" object here
            }))
            .pipe(new MongooseObjectStream(mongoose, 'Model', {}, { strict: false }))
            .on('error', reject)
            .on('finish', resolve);
    });
}

mongoose.connect('mongodb://localhost:27017/test', {
    useNewUrlParser: true,
    promiseLibrary: Promise
}).then(() => {
    return streamToDB('http://someurl/somefile.zip')
        .finally(() => mongoose.disconnect());
}).catch((err) => {
    console.error(err);
});
Jake Holzinger
  • 5,783
  • 2
  • 19
  • 33
  • it return an error after the db connection: mongoDB connected internal/streams/legacy.js:57 throw er; // Unhandled stream error in pipe. ^ Error: write after end at writeAfterEnd (D:\tests\app\node_modules\readable-stream\lib\_stream_writable.js:144:12) at BunWrapper.Writable.write (D:\tests\app\node_modules\readable-stream\lib\_stream_writable.js:192:5) at Request.ondata (internal/streams/legacy.js:15:31) at Request.emit (events.js:182:13) at IncomingMessage. (D:\tests\app\node_modules\request\request.js:1076:12) ..... – Francesco Bellavita Aug 09 '18 at 23:00
  • and in the mongodb collection i see there is only the first document – Francesco Bellavita Aug 09 '18 at 23:10
  • My fault! I was left a row that cause a double request... now it works but the database remain connected and node do not exit, same as before i change code with promise – Francesco Bellavita Aug 10 '18 at 01:16
  • You probably need to close the `modelStream` object using `modelStream.end()` so that the DB connection is free to close. – Jake Holzinger Aug 10 '18 at 01:53
  • I have completely removed the mongoose-object-stream module, now the data is saved with the mongoose save command: new someModel(row).save(); The db is filled but the command .on ('end', function ()) does not work, the promise does not resolve. I can not understand why it does not end – Francesco Bellavita Aug 10 '18 at 02:25
  • I looked at this more, the problem was with `bun`, for me it was firing the `finish` event long before the streaming was really finished. I updated my answer with working code. – Jake Holzinger Aug 10 '18 at 05:07
  • thank you very much, I solved in another way (I do not know if you saw my answer) but your advice was very useful – Francesco Bellavita Aug 10 '18 at 07:30
  • I did see, glad you were able to get it working, I was mostly sharing my solution because the original wasn't exactly correct. I do have some feedback for you that I will post on your solution. – Jake Holzinger Aug 10 '18 at 16:26
0

I did it! Through2 need .on("data", function(){}) before the .on("end"... Now the process gracefully disconnect the database and exit.

var request = require('request');
var bun = require('bun');
var split = require('split');
var unzip = require('./lib/unzip');
var tomongo = require('./lib/tomongo');

var pipeline = bun([unzip(), split()]);

function streamToDB(url) {
    return new Promise((resolve, reject) => {
        request.get(url)
            .pipe(pipeline)
            .pipe(tomongo())
            .on("data", function(data){
            new aModel( data ).save();}) //here i save to the db
            .on("error", reject)
            .on("end", resolve);
    });
}

mongoose.connect("mongodb://localhost:27017/test", {
    useNewUrlParser: true
}).then(() => {
    console.log('mongoDB connected');
    return streamToDB("http://someurl/somefile.zip")
}).catch((err) => {
    console.log('err', err);
}).then(() => {
    return mongoose.disconnect();
});

//tomongo.js
var parseandwrite = function( chunk, _, cb ){
    var row = {}, cells = chunk.toString('utf-8').split('\t');
    cells.forEach( function( cell, i ){
      row[ columns[ i ] ] = ( cell || '' ).trim();
    });
    if( !!chunk ){
      this.push( row ); //here i push the row to the stream
    }
    cb();
  };
  • You may want to consider using `mongoose-object-stream` or creating similar functionality using `through2`. The `save()` method is asynchronous so you should tie it to the stream, this will prevent the stream from ending until all saves have been flushed to the database. Using through that would look like this: `.pipe(through.obj((row, enc, cb) => new aModel(row).save(cb)))` – Jake Holzinger Aug 10 '18 at 16:30