1

I'm writing a batch process to read an RSS feed, and store the contents in MongoDB via Mongoose. I would run the script, and it would process the contents just fine... but the script wouldn't return to the console. My hypothesis was that my database connection was still open, and that's why I it wasn't returning.

I couldn't just track when my parsing was complete, because I still might have some mongoose save operations going on.

So, I wrote a function to track my open database connections, and the status of my RSS parsing. But my code ended up being heinously verbose. I'm wondering if there is a better model/pattern for doing something like this.

var FeedParser = require('feedparser')
    , mongoose = require('mongoose');

var TEST_RSS_URL = "./test/data/20120303-seattle.rss";
var OPEN_DB_CONNECTIONS = 0;
var PARSING_DONE = false;

/*
 * Keeps track of open database connections, and closes the connection when done
 */
function track_and_close_database(mode) {
    switch(mode)
    {
    case 'open':
        OPEN_DB_CONNECTIONS++;
        break;
    case 'close':
        OPEN_DB_CONNECTIONS--;
        if (0 == OPEN_DB_CONNECTIONS && PARSING_DONE) conn.close();
        break;
    case 'parseStart':
        PARSING_DONE = false;
        break;
    case 'parseEnd':
        PARSING_DONE = true;
        if (0 == OPEN_DB_CONNECTIONS && PARSING_DONE) conn.close();
        break;
    }
}

function parse_stuff(stuff) {
    // do some stuff
    setTimeout(console.log("parsed some stuff",20));
}

function main() {
    parser = new FeedParser();

    parser.on('article', function(article) {
        track_and_close_database('open');
        // check to see if we already have this listing
        stuff_model = conn.model('stuff');
        stuff = stuff_model.findOne({'href': article.link}, function (error, doc) {
            if (error) {
                track_and_close_database('close');
                return;
            }
            // this one doesn't exist yet, parse and save
            if (null == doc) {
                listing = parse_stuff(article);

                // if listing is valid, save it!
                if (null != listing) {
                    listing.save(function (error) { track_and_close_database('close') });
                }
                // parsing failed
                else track_and_close_database('close');
            }
            // nothing to do, already in the database
            else track_and_close_database('close');
        });
    });

    // Completed parsing the RSS file
    parser.on('end', function(article) {
    track_and_close_database('parseEnd');
    });

    track_and_close_database('parseStart')
    parser.parseFile(TEST_RSS_URL);
}

// run this thing
main();
adamb0mb
  • 1,401
  • 1
  • 12
  • 15

1 Answers1

1

I too ran into this issue. I would think the preferred way to handle this would be with events, however upon looking into the source code there didn't seem to be anything in there that kept some type of operation count. I ended up hooking it up to an the EventEmitter. What would be better is if Mongoose emitted events for before and after saves so I didn't have to plug this into all of my models.

Here is a sample of how I did it:

/* lib/render_log.js */
/* Model for managing RenderLog */

var common = require("./common");
common.initialize_locals(global);

var mongoose = require("mongoose"),
    Schema = mongoose.Schema;

var RenderLogSchema = new Schema({
    renderer: String,
    template: String,
    content: {}
});

RenderLogSchema.pre('save', function(next){
  this.db.model('RenderLog').emit('open_db_op', this); 
  next();
});

RenderLogSchema.post('save', function(){
  this.db.model('RenderLog').emit('close_db_op', this); 
});

mongoose.connect('mongodb://localhost/time-jr-development');

var RenderLog = mongoose.model("RenderLog", RenderLogSchema);
exports = module.exports = RenderLog;

Followed by my test executable:

/* bin/test_mongoose.js */

var async = require('async');
var RenderLog = require("../lib/render_log");

var ConnectionManager = {
  open_db_ops: 0,

  new_db_op: function(){
        this.open_db_ops ++;
  },

  close_db_op: function(){
    this.open_db_ops --;
  },

  close: function(self){
        if(!self)
      self = this;
    if(self.open_db_ops > 0){
      console.log("Waiting...")
      process.nextTick(async.apply(self.close, self));
    }else{
      RenderLog.db.close();
    }
  }
};


RenderLog.on("open_db_op", function(model){
  ConnectionManager.new_db_op();
});

RenderLog.on("close_db_op", function(model){
  ConnectionManager.close_db_op();
})

new RenderLog({renderer: "foo", template: "foo.html", content: {"bar": 1}}).save();
new RenderLog({renderer: "foo", template: "foo.html", content: {"bar": 2}}).save();
new RenderLog({renderer: "foo", template: "foo.html", content: {"bar": 3}}).save();
new RenderLog({renderer: "foo", template: "foo.html", content: {"bar": 4}}).save();
new RenderLog({renderer: "foo", template: "foo.html", content: {"bar": 5}}).save();
// You have to push this to the next tick otherwise it gets called before the save
// events have been emitted
async.nextTick(async.apply(ConnectionManager.close, ConnectionManager));
Orion
  • 61
  • 1
  • 6