-1

I wrote a script in Node that iterates over a large MongoDB collection, returning a certain number of documents at a time.

The collection has this simple format:

{
    name: 'One',
    data: '...'
},
{
    name: 'Two',
    data: '...'
},
...

I'm doing this job with the Q library, using a sequence of promises that get run one after the other:

'use strict';

var Q    = require('q');
var monk = require('monk');

var CHUNK_SIZE = 100;
var LIMIT = 1000;

var collection = monk('localhost/dictionary').get('entries');

var promiseFactory = function (j) {
    return function (result) {

        if (undefined !== result) {  // if result is undefined, we are at the first or last iteration.

            if (result.length) {
                for (var k = 0, max = result.length; k < max; k++) {
                    console.log(result[k].name); // print name
                    // ... do something with the document here...
                }
            } else { // no more documents, end of the iteration
                return; // implicitely returns undefined
            }
        }

        // returns CHUNK_SIZE documents, starting from the j-th document
        return collection.find({}, { limit: CHUNK_SIZE, skip: j, sort: { name: 1 }});

    };
};

var funcs = [];
for (var i = CHUNK_SIZE; i <= LIMIT; i += CHUNK_SIZE) {
    funcs.push(promiseFactory(i));
}

var loop = Q.fcall(promiseFactory(0));

funcs.forEach(function (f) {
    loop = loop.then(f);
});

The script works well and does achieve what it was designed to do.

However, I would like to improve it:

  • I'm hardcoding the number of documents in the collection (LIMIT). I would like to get rid of this variable and let the script detect when to stop.
  • I have a feeling that this approach may not be the most memory-efficient one. In my code, funcs.forEach() chains a lot of copies of the same function in one shot (to be exact LIMIT/CHUNK_SIZE copies). Since I'm working on a very large collection, I was wondering if there's a way to chain a new function only if there are still documents left, while running through the collection.
davejagoda
  • 2,420
  • 1
  • 20
  • 27
kYuZz
  • 1,572
  • 4
  • 14
  • 25

1 Answers1

0

I think I found the solution to both problems. It is just a simple addition in promiseFactory() which I have highlighted below. Adding it here in the hope it is useful to someone:

var promiseFactory = function (j) {
    return function (result) {

        if (undefined !== result) {  // if result is undefined, we are at the first or last iteration.

            if (result.length) {
                for (var k = 0, max = result.length; k < max; k++) {
                    console.log(result[k].en + ' - ' + result[k].le);
                }
            } else { // no more entries, end of the iteration
                return; // implicitely returns undefined
            }
        }

        ///////////////// CHANGE HERE ////////////////////////
        return entries.find({}, { limit: CHUNK_SIZE, skip: j, sort: { en: 1 }}).then(promiseFactory(j + CHUNK_SIZE));
        ///////////////////// END ////////////////////////////

    };
};
kYuZz
  • 1,572
  • 4
  • 14
  • 25