3

I have around 60 thousand document in users collection, and have the following query:

db.getCollection('users').aggregate([
    {"$match":{"userType":"employer"}},
    {"$lookup":{"from":"companies","localField":"_id","foreignField":"owner.id","as":"company"}},
    {"$unwind":"$company"},
    {"$lookup":{"from":"companytypes","localField":"company.type.id","foreignField":"_id","as":"companyType"}},
    {"$unwind":"$companyType"},
    { $group: { _id: null, count: { $sum: 1 } } }
])

It takes around 12 seconds to count, even I call count function before list function, but my list function with limit: 10 response faster than count.

And following is explain result:

{
    "stages" : [ 
        {
            "$cursor" : {
                "query" : {
                    "userType" : "employer"
                },
                "fields" : {
                    "company" : 1,
                    "_id" : 1
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "jobs.users",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "userType" : {
                            "$eq" : "employer"
                        }
                    },
                    "winningPlan" : {
                        "stage" : "COLLSCAN",
                        "filter" : {
                            "userType" : {
                                "$eq" : "employer"
                            }
                        },
                        "direction" : "forward"
                    },
                    "rejectedPlans" : []
                }
            }
        }, 
        {
            "$lookup" : {
                "from" : "companies",
                "as" : "company",
                "localField" : "_id",
                "foreignField" : "owner.id",
                "unwinding" : {
                    "preserveNullAndEmptyArrays" : false
                }
            }
        }, 
        {
            "$match" : {
                "$nor" : [ 
                    {
                        "company" : {
                            "$eq" : []
                        }
                    }
                ]
            }
        }, 
        {
            "$group" : {
                "_id" : {
                    "$const" : null
                },
                "total" : {
                    "$sum" : {
                        "$const" : 1
                    }
                }
            }
        }, 
        {
            "$project" : {
                "_id" : false,
                "total" : true
            }
        }
    ],
    "ok" : 1.0
}
jones
  • 1,423
  • 3
  • 35
  • 76

1 Answers1

4

$lookup operations are slow since they mimic the left join behavior, from the DOCS:

$lookup performs an equality match on the localField to the foreignField from the documents of the from collection

Hence if there are no indexes in the fields used for joining the collections Mongodb is force to do a collection scan.

Adding an index for the foreignField attributes should prevent a collection scan and increase the performance even of a magnitude

Paizo
  • 3,986
  • 30
  • 45