By the algorithm you described, the grouping logic for each document always depends on another document. I don't see a way for doing this using map reduce, aggregation or a single MongoDB query. The only solution I see is to follow strictly your algorithm, that is read each document and make the decision if it belongs to the current group or if it should be in a new one.
It's not recommended to load all documents in memory since it could be a very large collection. So I used a stream to load document by document.
Create a cursor that finds all documents and order them by the date and then use cursor.on('data', function(document){ ... });
to read each document individually.
var groups = {} // init group object
var currentGroupKey;
var groupInterval = 5 * 60 * 1000; // Five minutes in milliseconds
var cursor = db.collection("documents").find({}).sort({date: 1});
cursor.on('data', function(doc) {
var timestamp = doc.date.getTime();
if (currentGroupKey != null && currentGroupKey + groupInterval >= timestamp) {
// add it to current group
groups[currentGroupKey].push(doc);
} else {
// create a new group
groups[timestamp] = [doc];
currentGroupKey = timestamp;
}
});
cursor.once('end', function() {
// This is called after last document is read
console.log(groups); // print your grouped documents
db.close();
});
For this documents
[ { _id: 57f59acb8e73d9634ac8c7b0,
index: 3,
date: Wed Oct 05 2016 21:02:29 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7ae,
index: 1,
date: Wed Oct 05 2016 21:04:02 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b3,
index: 6,
date: Wed Oct 05 2016 21:07:43 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b4,
index: 7,
date: Wed Oct 05 2016 21:10:26 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b2,
index: 5,
date: Wed Oct 05 2016 21:14:23 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b5,
index: 8,
date: Wed Oct 05 2016 21:17:39 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b6,
index: 9,
date: Wed Oct 05 2016 21:21:07 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7ad,
index: 0,
date: Wed Oct 05 2016 21:24:19 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7af,
index: 2,
date: Wed Oct 05 2016 21:25:50 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b1,
index: 4,
date: Wed Oct 05 2016 21:28:13 GMT-0300 (BRT) } ]
The final group object is
{ '1475712149573':
[ { _id: 57f59acb8e73d9634ac8c7b0,
index: 3,
date: Wed Oct 05 2016 21:02:29 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7ae,
index: 1,
date: Wed Oct 05 2016 21:04:02 GMT-0300 (BRT) } ],
'1475712463238':
[ { _id: 57f59acb8e73d9634ac8c7b3,
index: 6,
date: Wed Oct 05 2016 21:07:43 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b4,
index: 7,
date: Wed Oct 05 2016 21:10:26 GMT-0300 (BRT) } ],
'1475712863890':
[ { _id: 57f59acb8e73d9634ac8c7b2,
index: 5,
date: Wed Oct 05 2016 21:14:23 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7b5,
index: 8,
date: Wed Oct 05 2016 21:17:39 GMT-0300 (BRT) } ],
'1475713267412':
[ { _id: 57f59acb8e73d9634ac8c7b6,
index: 9,
date: Wed Oct 05 2016 21:21:07 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7ad,
index: 0,
date: Wed Oct 05 2016 21:24:19 GMT-0300 (BRT) },
{ _id: 57f59acb8e73d9634ac8c7af,
index: 2,
date: Wed Oct 05 2016 21:25:50 GMT-0300 (BRT) } ],
'1475713693672':
[ { _id: 57f59acb8e73d9634ac8c7b1,
index: 4,
date: Wed Oct 05 2016 21:28:13 GMT-0300 (BRT) } ] }
EDIT
Since the logic for grouping is always the last read document, I modified the algorithm for fit it. Also now it updates each document with a group key so it don't load all documents in memory.
var lastDocumentTimestamp;
var groupIndex = 0;
var groupInterval = 5 * 60 * 1000; // Five minutes in milliseconds
var cursor = db.collection("documents").find({}).sort({date: 1});
cursor.on('data', function(doc) {
var timestamp = doc.date.getTime();
if (lastDocumentTimestamp + groupInterval < timestamp) {
groupIndex++;
}
lastDocumentTimestamp = timestamp;
db.collection("documents").update({ _id: doc._id}, { $set: {group: groupIndex}});
});
cursor.once('end', function() {
// This is called after last document is read
db.close();
});
After that you can use aggregation for grouping documents by its groups
db.collection("documents").aggregate([{
$group: {
_id: "$group",
count: { $sum: 1 },
docs: { $push: "$date" }
}
}])
This produces a result like:
[ { _id: 0,
count: 1,
docs: [ Thu Oct 06 2016 22:00:20 GMT-0300 (BRT) ] },
{ _id: 1,
count: 4,
docs:
[ Thu Oct 06 2016 22:20:31 GMT-0300 (BRT),
Thu Oct 06 2016 22:22:52 GMT-0300 (BRT),
Thu Oct 06 2016 22:25:34 GMT-0300 (BRT),
Thu Oct 06 2016 22:27:15 GMT-0300 (BRT) ] },
{ _id: 2,
count: 5,
docs:
[ Thu Oct 06 2016 22:33:27 GMT-0300 (BRT),
Thu Oct 06 2016 22:35:45 GMT-0300 (BRT),
Thu Oct 06 2016 22:38:45 GMT-0300 (BRT),
Thu Oct 06 2016 22:40:02 GMT-0300 (BRT),
Thu Oct 06 2016 22:44:20 GMT-0300 (BRT) ] } ]