3

there is aggregation pipeline:

db.getCollection('yourCollection').aggregate(
    {
        $unwind: {
            path: "$dates",
            includeArrayIndex: "idx"
        }
    },
    {
        $project: {
            _id: 0,
            dates: 1,
            numbers: { $arrayElemAt: ["$numbers", "$idx"] },
            goals: { $arrayElemAt: ["$goals", "$idx"] },
            durations: { $arrayElemAt: ["$durations", "$idx"] }
        }
    }
)

which perform on the following data (sample documents):

{
    "_id" : ObjectId("52d017d4b60fb046cdaf4851"),
    "dates" : [
        1399518702000,
        1399126333000,
        1399209192000,
        1399027545000
    ],
    "dress_number" : "4",
    "name" : "J. Evans",
    "numbers" : [
        "5982",
        "5983",
        "5984",
        "5985"
    ],
    "goals": [
        "1",
        "0",
        "4",
        "2"
    ],
   "durations": [
       "78",
       "45",
       "90",
       "90"
   ]
}

{
    "_id" : ObjectId("57e250c1b60fb0213d06737c"),
    "dates" : [
        "1399027545000",
        "1399101432000",
        "1399026850000",
        "1399904504000"
    ],
    "dress_number" : "6",
    "name" : K. Mitnick,
    "numbers" : [
        "0982",
        "0981",
        "0958",
        "0982"
    ],
    "durations" : [
        98,
        110,
        66,
        92
    ],
    "goals" : [
        "2",
        "3",
        "0",
        "1"
    ]
}

The query works good, but there are duplicate records so I'm trying to use $addToSet operator to avoid duplicates:

db.getCollection('yourCollection').aggregate(
        {
            $match: {
                "number": number
            }
        },
        {
            $unwind: {
                path: "$dates",
                includeArrayIndex: "idx"
            }
        },
         $group: {
                    _id: '$_id',
                    dates: { $addToSet: '$dates' }
        },
        {
            $project: {
                _id: 0,
                dates: 1,
                numbers: { $arrayElemAt: ["$numbers", "$idx"] },
                goals: { $arrayElemAt: ["$goals", "$idx"] },
                durations: { $arrayElemAt: ["$durations", "$idx"] }
            }
        }
    )

but I got only dates (other field are null)

{ dates: 
     [ '1399026850000',
       '1399101432000',
       '1399027545000',
       '1399904504000',
       '1399024474000',
       '1399126333000' ],
    numbers: null,
    goals: null,
    durations: null },
  { dates: 
     [ '1399027545000',
       '1399024474000',
       '1399518702000',
       '1399126333000',
       '1399209192000',
       '1399356651000' ],
    numbers: null,
    goals: null,
    conversation_durations: null },
  { dates: 
     [ '1399026850000',
       '1399101432000',
       '1399027545000',
       '1399904504000',
       '1399024474000' ],
    numbers: null,
    goals: null,
    durations: null } 

Does anybody know where is the problem?

corry
  • 1,457
  • 7
  • 32
  • 63
  • 1
    When you do the $group, you are essentially excluding all of the other variables. You can't re-project them back in after that point. If all you are trying to do is remove duplicates from your arrays, your best bet is to either do this in your javascript / client code, or use a map-reduce. See here: http://stackoverflow.com/questions/9862255/how-to-remove-duplicate-entries-from-an-array You can also modify your $group pipeline stage to add the other fields in there (See chridam's answer). – dyouberg Sep 22 '16 at 15:05

1 Answers1

6

You need to include the fields within the $group pipeline using the $first operator as follows:

db.getCollection('yourCollection').aggregate([
    { "$unwind": "$dates" },
    {
        "$group": {
            "_id": "$_id",
            "dates": { "$addToSet": "$dates" },
            "numbers": { "$first": "$numbers" },
            "goals": { "$first": "$goals" },
            "durations": { "$first": "$durations" }
        }
    },
    { "$unwind": {
            "path": "$dates",
            "includeArrayIndex": "idx"
    } },
    {
        "$project": {
            "_id": 0,
            "dates": 1,
            "numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
            "goals": { "$arrayElemAt": ["$goals", "$idx"] },
            "durations": { "$arrayElemAt": ["$durations", "$idx"] }
        }
    }
])

or using $setUnion to eliminate duplicates as:

db.getCollection('yourCollection').aggregate([
    {
        "$project": {
            "_id": 0,
            "dates": { "$setUnion": ["$dates", "$dates"] },
            "numbers": 1,
            "goals": 1,
            "durations": 1
        }
    }
    { "$unwind": {
            "path": "$dates",
            "includeArrayIndex": "idx"
    } },
    {
        "$project": {
            "_id": 0,
            "dates": 1,
            "dateIndex": "$idx",
            "numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
            "goals": { "$arrayElemAt": ["$goals", "$idx"] },
            "durations": { "$arrayElemAt": ["$durations", "$idx"] }
        }
    }
])
chridam
  • 100,957
  • 23
  • 236
  • 235
  • Thanks, I've tried with both solutions, but there are still duplicates :/ – corry Sep 26 '16 at 07:42
  • Can you update your question with the sample documents that are producing the duplicates, as well as showing the expected output with those docs? – chridam Sep 26 '16 at 07:46
  • Please take a look at the question http://stackoverflow.com/questions/39426022/mongodb-show-children-items-in-one-to-many-relationship/39426859#39426859 – corry Sep 26 '16 at 08:38
  • Isn't that question resolved already? Can you please update your question with the sample documents that are producing the duplicates and your expected output so that I can do a test and confirm? – chridam Sep 26 '16 at 08:42
  • Yes, but it's similar. Also, before $unwind operator, I'm using $match. Can this be a problem? Take a look at second sample document. I've got `{ dates: '1399027545000', numbers: '0982', goals: '2', durations: 92 }, { dates: '1399101432000', numbers: '0982', goals: '2', durations: 92 }, { dates: '1399026850000', numbers: '0982', goals: '2', durations: 92 }, { dates: '1399027545000', numbers: '0982', goals: '2', durations: 92 }`. As you can see, the last document is duplicate of first document. – corry Sep 26 '16 at 09:07
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/124190/discussion-between-corry-and-chridam). – corry Sep 26 '16 at 09:16