The basic procedure here is to use .findAndModify()
:
Forgive that this is not python code, but the structure is the same and it's a reasonable universal example. Three documents:
{ "_id": 1 }
{ "_id": 2 }
{ "_id": 3 }
So from the core method, you just call it with the "remove" argument on each _id
. No other process can do this at the same time.
db.collection.findAndModify({
"query": { "_id": 1 },
"remove": true
})
That will either return the document that was removed or nothing at all.
For a bit more "concurrency" proof, again excuse the node.js code here but I'm not in a frame to do some briliant "Twisted" type code as a quick example. It serves the purpose of a concurrency test though:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
var testSchema = new Schema({
"_id": Number,
});
var Test = mongoose.model( 'Test', testSchema, 'test' );
mongoose.connect('mongodb://localhost/async');
async.series(
[
// Clear test collection
function(callback) {
Test.remove({},callback)
},
// Insert some data
function(callback) {
async.each([1,2,3],function(num,callback) {
Test.create({ "_id": num },callback);
},callback);
},
// Now run test in parallel
function(callback) {
async.each([1,1,2,2,3,3],function(num,callback) {
Test.findOneAndRemove(
{ "_id": num },
function(err,doc) {
if (err) callback(err);
console.log( "Removing: %s, %s", num, doc );
callback();
}
);
},callback);
}
],
function(err) {
process.exit();
}
);
And results (in possible varying order ) :
Removing: 3, { _id: 3, __v: 0 }
Removing: 1, { _id: 1, __v: 0 }
Removing: 3, null
Removing: 1, null
Removing: 2, { _id: 2, __v: 0 }
Removing: 2, null
So out of the six attempts run here with two attempts per document, only 3 of the attempts actually succeeded and returned the result pulled off of the stack.
That's the principle to ensuring the result you want.