I have a collection with approximately 500 million documents where it seems the uniqueness constraint has not been enforced on a specific subset of these. The uniqueness contraint applies to a compound index.
The indices on this collection:
db.elements.getIndexes()
[
{
"v" : 1,
"key" : {
"_id" : 1
},
"name" : "_id_",
"ns" : "test.elements"
},
{
"v" : 1,
"key" : {
"sessionId" : 1
},
"name" : "sessionId_1",
"ns" : "test.elements"
},
{
"v" : 1,
"key" : {
"sessionId" : 1,
"modelFolder" : 1,
"modelName" : 1
},
"name" : "sessionId_1_modelFolder_1_modelName_1",
"ns" : "test.elements",
"options" : {
"unique" : true
}
},
{
"v" : 1,
"key" : {
"id" : 1
},
"name" : "id_1",
"ns" : "test.elements",
"options" : {
"unique" : false
}
},
{
"v" : 1,
"key" : {
"uniqueId" : 1
},
"name" : "uniqueId_1",
"ns" : "test.elements"
}
]
When I run the following query I get duplicates despite the query matching on the compound index fields of the index 'sessionId_1_modelFolder_1_modelName_1' (specific field values redacted due to IP concerns):
var gs = (
db
.elements
.aggregate(
[
{
$match : {
"sessionId" : (specific sessionId value),
"modelName" : (specific modelName value),
"modelFolder" : (specific modelFolder value)
}
},
{
$group : {
_id : "$id",
total : { $sum : 1 }
}
}
]
)
);
gs.forEach(
function(g) { printjson(g); }
);
A subset of the output:
{ "_id" : 1394912, "total" : 2 }
{ "_id" : 1394916, "total" : 2 }
{ "_id" : 1394914, "total" : 2 }
{ "_id" : 1394909, "total" : 2 }
{ "_id" : 1394877, "total" : 2 }
{ "_id" : 1394908, "total" : 2 }
{ "_id" : 1394900, "total" : 2 }
{ "_id" : 1394906, "total" : 2 }
{ "_id" : 1394907, "total" : 2 }
{ "_id" : 1394876, "total" : 2 }
{ "_id" : 1394904, "total" : 2 }
{ "_id" : 1394902, "total" : 2 }
{ "_id" : 1394903, "total" : 2 }
{ "_id" : 1394881, "total" : 2 }
{ "_id" : 1394859, "total" : 2 }
{ "_id" : 1394901, "total" : 2 }
{ "_id" : 1394878, "total" : 2 }
{ "_id" : 1394880, "total" : 2 }
{ "_id" : 1394857, "total" : 2 }
{ "_id" : 1394875, "total" : 2 }
I had killed a batch insert of this subset of documents then re-bulk inserted them later on but I'm surprised this somehow allows duplicates. Am I going crazy or is this possible under certain conditions?