I'm using MongoDB and need to remove duplicate records. I have a listing collection that looks like so: (simplified)
[
{ "MlsId": "12345"" },
{ "MlsId": "12345" },
{ "MlsId": "23456" },
{ "MlsId": "23456" },
{ "MlsId": "0" },
{ "MlsId": "0" },
{ "MlsId": "" },
{ "MlsId": "" }
]
A listing is a duplicate if the MlsId is not "" or "0" and another listing has that same MlsId. So in the example above, the 2nd and 4th records would need to be removed.
How would I find all duplicate listings and remove them? I started looking at MapReduce but couldn't find an example that fit my case.
Here is what I have so far, but it doesn't check if the MlsId is "0" or "":
m = function () {
emit(this.MlsId, 1);
}
r = function (k, vals) {
return Array.sum(vals);
}
res = db.Listing.mapReduce(m,r);
db[res.result].find({value: {$gt: 1}});
db[res.result].drop();