3

I have documents like this:

{
    "_id" : ObjectId("565e906bc2209d91c4357b59"),
    "userEmail" : "abc@example.com",
    "subscription" : {
        "project1" : {
            "subscribed" : false
        },
        "project2" : {
            "subscribed" : true
        }
    }
}

{
    "_id" : ObjectId("565e906bc2209d91c4357b59"),
    "userEmail" : "mno@example.com",
    "subscription" : {
        "project1" : {
            "subscribed" : true
        },
        "project2" : {
            "subscribed" : true
        },
        "project3" : {
            "subscribed" : true
        }
    }
}

I would like to group by users with the list of projects where the subscribed flag is set to true.

For instance, what I'm expecting is:

abc@example.com - project2
mno@example.com - project1,project2,project3

I'll have a cron job which will send the respective details on the subscribed projects to the corresponding mail ids.

I tried aggregation, but aggregation needs the key specified. In my case the key (which is Project1, Project2 and so on) is dynamic. So I read somewhere mapReduce is the correct option to go for. But I don't have an experience in using mapReduce.

Kindly help me to solve this problem and make me to understand with more explanation on how to handle this.

styvane
  • 59,869
  • 19
  • 150
  • 156
Vimalraj Selvam
  • 2,155
  • 3
  • 23
  • 52
  • Maybe [$redact](https://docs.mongodb.org/manual/reference/operator/aggregation/redact/) could help here to remove the array entries with `subscribed: false`. – Philipp Dec 03 '15 at 12:33
  • 1
    If you change *subscription* to an array [{project:"project1", subscribed:true},...] it can easily be done with the aggregation framework. Otherwise, you have to go with map-reduce or some client-side code. – joao Dec 03 '15 at 13:07

1 Answers1

1

With your documents' current structure you need to use mapReduce.

db.subscription.mapReduce(
    function() {
        var project = [];
        for (key in this.subscription) {     
            if(Object.prototype.hasOwnProperty.call(this.subscription, key) && this.subscription[key]['subscribed']) 
            { project.push(key); } 
        } 
        emit(this.userEmail, project); 
    }, 
    function(key, values) {}, 
    { out: {'inline': 1 } }
)

Which returns:

{
        "results" : [
                {
                        "_id" : "abc@example.com",
                        "value" : [
                                "project2"
                        ]
                },
                {
                        "_id" : "mno@example.com",
                        "value" : [
                                "project1",
                                "project2",
                                "project3"
                        ]
                }
        ],
        "timeMillis" : 28,
        "counts" : {
                "input" : 2,
                "emit" : 2,
                "reduce" : 0,
                "output" : 2
        },
        "ok" : 1
}

You should consider to change your document structure. To to that you need to update your document and change "subscription" to and array of sub documents using "bulk" operations for maximum efficiency.

var bulk = db.subscription.initializeOrderedBulkOp();
var count = 0;

db.subscription.find().forEach(function(doc) {
    var newSubscriptions = [];
    var subscription = doc.subscription;
    for (key in subscription) {     
            if(Object.prototype.hasOwnProperty.call(subscription, key)) 
                newSubscriptions.push( { 'project': key, 'subscribed': subscription[key]['subscribed'] });
    }
    bulk.find( { '_id': doc._id } ).updateOne( {
        '$set': { subscription': newSubscriptions }
    });
    count++;
    if (count % 500 === 0) {
        bulk.execute();
        db.subscription.initializeOrderedBulkOp();
    }
})

// clean up queues
if (count > 0) 
    bulk.execute();

After this operation your documents look like this:

{
        "_id" : ObjectId("566041212729b51edb5871d4"),
        "userEmail" : "abc@example.com",
        "subscription" : [
                {
                        "project" : "project1",
                        "subscribed" : false
                },
                {
                        "project" : "project2",
                        "subscribed" : true
                }
        ]
}
{
        "_id" : ObjectId("565e906bc2209d91c4357b59"),
        "userEmail" : "mno@example.com",
        "subscription" : [
                {
                        "project" : "project1",
                        "subscribed" : true
                },
                {
                        "project" : "project2",
                        "subscribed" : true
                },
                {
                        "project" : "project3",
                        "subscribed" : true
                }
        ]
}

You can you the .aggregate() method which provides access to the aggregation pipeline:

db.subscription.aggregate([
    { '$project': {
        'userEmail': 1,
        'projects': {
            '$setDifference': [
                { '$map': {
                    'input': '$subscription', 
                    'as': 'srpt',
                    'in': { '$cond': [ '$$srpt.subscribed', '$$srpt.project', false ] } 
                }}, 
                [false]
            ]
        }
    }}
])

Which yields:

{
        "_id" : ObjectId("566041212729b51edb5871d4"),
        "userEmail" : "abc@example.com",
        "projects" : [
                "project2"
        ]
}
{
        "_id" : ObjectId("565e906bc2209d91c4357b59"),
        "userEmail" : "mno@example.com",
        "projects" : [
                "project1",
                "project2",
                "project3"
        ]
}
styvane
  • 59,869
  • 19
  • 150
  • 156