0

There are documents in 'account' collection. Some fields may not be declared. Look at this.

{ _id: ObjectId(1), price: 5400, product: ObjectId(2), count: 1            }
{ _id: ObjectId(1),              product: ObjectId(2), count: 0, sale: 0.2 }
{ _id: ObjectId(1),              product: ObjectId(2), count: 1            }

Using aggregate framework, I want to have last price, last sale and count for a product. Currently I use this query

db.collection('account').aggregate([
{
    $group: {
            _id: '$product',
            count: { $sum: '$count' },
            price: { $last: '$price' }, 
            sale: { $last: '$sale' }
        }
    }
], { allowDiskUse: true }).toArray()

But sale and price fields becomes null. I think, I should use $project before $group, how do I retain last document's value if current field is null?

DAXaholic
  • 33,312
  • 6
  • 76
  • 74
GRiMe2D
  • 654
  • 1
  • 10
  • 22
  • I think this is task that calls for reading the data into your language of choice. Any data-frame library (like `pandas` in python) would handle this trivially – michael_j_ward Jun 18 '16 at 12:24

2 Answers2

2

Solution
The following pipeline should give you the desired result

db.getCollection('account').aggregate(
[
    {
        $project: {
            _id: '$product',
            fields: [
                { name: { $literal: 'price' }, value: '$price',        count: { $literal: 0 } },
                { name: { $literal: 'sale' },  value: '$sale',         count: { $literal: 0 } },
                { name: { $literal: 'count' }, value: { $literal: 0 }, count: '$count' }
            ]
        }
    },
    {
        $unwind: {
            path: '$fields'
        }
    },
    {
        $match: {
            'fields.value': {
                $exists: true
            }
        }
    },
    {
        $group: {
            _id: {
                product: '$_id',
                field: '$fields.name'
            },
            value: {
                $last: '$fields.value'
            },
            count: {
                $sum: '$fields.count'
            }
        }
    },
    {
        $project: {
            _id: '$_id.product',
            price: {
                $cond: { if: { $eq: [ '$_id.field', 'price' ] }, then: '$value', else: null }
            },
            sale: {
                $cond: { if: { $eq: [ '$_id.field', 'sale' ] }, then: '$value', else: null }
            },
            count: {
                $cond: { if: { $eq: [ '$_id.field', 'count' ] }, then: '$count', else: 0 }
            }
        }
    },
    {
        $group: {
            _id: '$_id',
            price: {
                $max: '$price'
            },
            sale: {
                $max: '$sale'
            },
            count: {
                $sum: '$count'
            }
        }
    }
])

Explanation
It first creates a new array with elements per field which contain the field name, field value and count value. Note that the field count is treated special as it should be accumulated instead of getting the last value of it. So after the first stage documents look like this:

/* 1 */
{
    "_id" : "2",
    "fields" : [ 
        {
            "name" : "price",
            "value" : 5400,
            "count" : 0.0
        }, 
        {
            "name" : "sale",
            "count" : 0.0
        }, 
        {
            "name" : "count",
            "value" : 0.0,
            "count" : 1
        }
    ]
}

/* 2 */
{
    "_id" : "2",
    "fields" : [ 
        {
            "name" : "price",
            "count" : 0.0
        }, 
        {
            "name" : "sale",
            "value" : 0.2,
            "count" : 0.0
        }, 
        {
            "name" : "count",
            "value" : 0.0,
            "count" : 0
        }
    ]
}

/* 3 */
{
    "_id" : "2",
    "fields" : [ 
        {
            "name" : "price",
            "count" : 0.0
        }, 
        {
            "name" : "sale",
            "count" : 0.0
        }, 
        {
            "name" : "count",
            "value" : 0.0,
            "count" : 1
        }
    ]
}

It then unwinds the array and filters it to get rid of null values, so after stage 2 & 3 documents look like this:

/* 1 */
{
    "_id" : "2",
    "fields" : {
        "name" : "price",
        "value" : 5400,
        "count" : 0.0
    }
}

/* 2 */
{
    "_id" : "2",
    "fields" : {
        "name" : "count",
        "value" : 0.0,
        "count" : 1
    }
}

/* 3 */
{
    "_id" : "2",
    "fields" : {
        "name" : "sale",
        "value" : 0.2,
        "count" : 0.0
    }
}

/* 4 */
{
    "_id" : "2",
    "fields" : {
        "name" : "count",
        "value" : 0.0,
        "count" : 0
    }
}

/* 5 */
{
    "_id" : "2",
    "fields" : {
        "name" : "count",
        "value" : 0.0,
        "count" : 1
    }
}

In the fourth stage the fields' last values and sum of count are built. Result looks like this:

/* 1 */
{
    "_id" : {
        "product" : "2",
        "field" : "sale"
    },
    "value" : 0.2,
    "count" : 0.0
}

/* 2 */
{
    "_id" : {
        "product" : "2",
        "field" : "count"
    },
    "value" : 0.0,
    "count" : 2
}

/* 3 */
{
    "_id" : {
        "product" : "2",
        "field" : "price"
    },
    "value" : 5400,
    "count" : 0.0
}

As the values are now in separate documents with a different shape than our desired result should be, we need to project them back into something we can eventually group. So in after the fifth stage documents are like so:

/* 1 */
{
    "_id" : "2",
    "count" : 0.0,
    "price" : null,
    "sale" : 0.2
}

/* 2 */
{
    "_id" : "2",
    "count" : 2,
    "price" : null,
    "sale" : null
}

/* 3 */
{
    "_id" : "2",
    "count" : 0.0,
    "price" : 5400,
    "sale" : null
}

The final stage then just aggregates those documents per product.

DAXaholic
  • 33,312
  • 6
  • 76
  • 74
0

My current solution does in three queries.

1. - Group by `product` and sum `count` field
   - Remove any entries, where `count` is 0 and lower
   - Hold `_ids` array as `product_ids`
   - Hold result array as `firstArray`

2. - Match any entries, where 
            - `sale` is 0 and greater
            - `_id` is in `product_ids`
   - Group by `product` and get last `sale` field
   - Hold result array as `secondArray`

3. - Match any entries, where
            - `price` is 0 and greater
            - `_id` is in `product_ids`
   - Group by `product` and get last `price` field
   - Enumerate `firstArray` and add `count` field to entries
   - Enumerate `secondArray` and add `sale` field to entries

And the code

let firstArray
let secondArray
let product_ids
Promise.resolve().then(function () {
    return db.collection(ACCOUNT).aggregate([
        {
            $group: {
                _id: '$product',
                count: { $sum: '$count' }
            }
        }, {
            $match: {
                count: { $gt: 0 }
            }
        }
    ], { allowDiskUse: true }).toArray()
}).then(function (array) {
    firstArray = array
    product_ids = array.map(function (item) {
        return item._id
    })
    return db.collection(ACCOUNT).aggregate([
        {
            $match: {
                product: { $in: product_ids },
                sale: { $gte: 0 }
            }
        }, {
            $group: {
                _id: '$product',
                sale: { $last: '$sale' }
            }
        }
    ], { allowDiskUse: true }).toArray()
}).then(function (array) {
    secondArray = array

    return db.collection(ACCOUNT).aggregate([
        {
            $match: {
                product: { $in: product_ids },
                price: { $gte: 0 }
            }
        }, {
            $group: {
                _id: '$product',
                price: { $last: '$price' }
            }
        }
    ], { allowDiskUse: true }).toArray()
}).then(function (array) {
    req.data = array.map(function (item) {
        let count = 0
        let sale = 0
        firstArray.some(function (_item) {
            if (item._id.toHexString() == _item._id.toHexString()) {
                count = _item.count
                return true
            }

            return false
        })
        secondArray.some(function (_item) {
            if (item._id.toHexString() == _item._id.toHexString()) {
                sale = _item.sale
                return
            }
            return false
        })

        item.count = count
        item.sale = sale
        return item
    })
    next()
})
GRiMe2D
  • 654
  • 1
  • 10
  • 22