0

I have a MongoDB aggregation pipeline, which I have written in C#.

$geoNear{
  near: {
    type: "Point",
    coordinates: [-110.29665, 31.535699],
  },
  distanceField: "distance",
  maxDistance: 100,
  query: {
    $and: [
      {
        IsResidential: true,
      },
      {
        DaysSinceLastSale: {
          $gt: 10,
        },
      },
    ],
  },
  spherical: true,
},

$lookup:
{
  from: "tax_assessor",
  let: {
    propertyCity: "Chicago",
    propertyState: "IL"
    ownerName: "$OwnerName",
  },
  pipeline: [
    {
      $match: {
        $expr: {
          $and: [
            {$eq: ["$PropertyCity", "$$propertyCity"]},
            {$eq: ["$OwnerName", "$$ownerName"]},
            {$eq: ["$PropertyState", "$$propertyState"]},
          ],
        },
      },
    },
  ],
  as: "NumberOfProperties",
},

$project:
{
  _id: 0,
  FullAddress: 1,
  OwnerName: 1,
  distance: 1,
  YearBuilt: 1,
  NumberOfProperties: {
    $size: "$NumberOfProperties"
  }
}

Here what I need is something similar to this SQL:

select res1.owner_name, res1.full_address, res1.distance, res1.year_built, count(res2.owner_name) as property_count from 
(select * from properties where geolocation is <within a given range> and <some filters>) res1
left join 
(select * from properties where city=<given city> and state=<given state>) res2
on res1.owner_name = res2.owner_name
group by res1.owner_name
order by res1.distance

I could get the correct result but this aggregation is very slow.

When checking the execution plan, I saw the first stage - GeoNear has used an index. But in the second stage - lookup, it has not used any of the indexes.

"stages" : [
    {
      "$geoNearCursor" : {
        "queryPlanner" : {
          "plannerVersion" : 1,
          "namespace" : "tax_assessor",
          "indexFilterSet" : false,
          "parsedQuery" : {..},
          "queryHash" : "4B38534E",
          "planCacheKey" : "2328FDE9",
          "winningPlan" : {
            "stage" : "FETCH",
            "filter" : {..},
            "inputStage" : {
              "stage" : "GEO_NEAR_2DSPHERE",
              "keyPattern" : {
                "PropertyGeoPoint" : "2dsphere",
                "DaysSinceLastSale" : 1,
                "IsResidential" : 1
              },
              "indexName" : "sta_geo_idx",
              "indexVersion" : 2,
              "inputStages" : [..]
            }
          },
          "rejectedPlans" : [ ]
        },
        "executionStats" : {
          "executionSuccess" : true,
          "nReturned" : 2,
          "executionTimeMillis" : 1911,
          "totalKeysExamined" : 450,
          "totalDocsExamined" : 552,
          "executionStages" : {..},
                  }
      },
      "nReturned" : NumberLong(2),
      "executionTimeMillisEstimate" : NumberLong(0)
    },
    {
      "$lookup" : {
        "from" : "tax_assessor",
        "as" : "NumberOfProperties",
        "let" : {
          "propertyCity" : {
            "$const" : "COLUMBUS"
          },
          "propertyState" : {
            "$const" : "OH"
          },
          "ownerName" : "$OwnerName"
        },
        "pipeline" : [
          {
            "$match" : {
              "$expr" : {
                "$and" : [
                  {
                    "$eq" : [
                      "$PropertyCity",
                      "$$propertyCity"
                    ]
                  },
                  {
                    "$eq" : [
                      "$PropertyState",
                      "$$propertyState"
                    ]
                  },
                  {
                    "$eq" : [
                      "$OwnerName",
                      "$$ownerName"
                    ]
                  }
                ]
              }
            }
          }
        ]
      },
      "nReturned" : NumberLong(2),
      "executionTimeMillisEstimate" : NumberLong(1910)
    },
    {
      "$project" : {
        "OwnerName" : true,
        "FullAddress" : true,
        "distance" : true,
        "YearBuilt" : true,
        "NumberOfProperties" : {
          "$size" : [
            "$NumberOfProperties"
          ]
        },
        "_id" : false
      },
      "nReturned" : NumberLong(2),
      "executionTimeMillisEstimate" : NumberLong(1910)
    },
    {
      "$sort" : {
        "sortKey" : {
          "Distance" : 1
        }
      },
      "nReturned" : NumberLong(2),
      "executionTimeMillisEstimate": NumberLong(1910)
    }
  ]

Based on the above stats, $lookup is why it doesn't use any indexes. Does it give me an optimized result by rearranging the stages or applying a proper index? Or is there a better way to get the NumberOfProperties without using $lookup.

Shehan V
  • 164
  • 1
  • 14

1 Answers1

0

Maybe try this one:

$lookup:
{
  from: "tax_assessor",
  localField: "ownerName",
  foreignField: "OwnerName",
  pipeline: [
    {
      $match: {
        PropertyCity: "Chicago",
        propertyState: "IL",
      }
    }
  ],
  as: "NumberOfProperties",
}
Wernfried Domscheit
  • 54,457
  • 9
  • 76
  • 110