0

Hi have a MongoDB collection matchedpairs with a data structure as follows:

each document defines a pairwise connection with each other, i.e 1 is in union with 2 and 2 is in union with 10 etc. There are a large number of relationships defined.

{
    x:1,
    y:2
},
{
    x:2,
    y:10
},
{
    x:9,
    y:10
},
{
    x:8,
    y:4
}

I would like to query the documents and retrieve the unique disjoint sets for the pairs, i.e. return a result like this

{
    set:[1,2,9,10]
},
{
    set:[8,4]
}

I am familiar with the aggregation framework, but cannot see how to create the correct accumulator in the $group stage to create the disjoint sets. The attempt below simply gives just one grouping of similar pairs. As I see it I would have to create a whole string of $group stages (depending upon my set of data) to get the result I am looking for. Any clever ideas here?

db.matchedpairs.aggregate([
    {
        '$group': {
            '_id': '$y', 
            'like': {
                '$addToSet': '$x'
            }, 
            'from': {
                '$addToSet': '$y'
            }
        }
    }, {
        '$project': {
            '_id': 0, 
            'set': {
                '$setUnion': [
                    '$like', '$from'
                ]
            }
        }
    }
]

gives:

{
 set:[4,8]
},
{
 set:[10,2,9]
},
{
 set:[1,2]
}
Neal Shail
  • 61
  • 5
  • Looks like a recursive search problem. I think [`$graphLookup`](https://docs.mongodb.com/manual/reference/operator/aggregation/graphLookup/) could be used to solve this. – styvane Nov 17 '19 at 21:56

1 Answers1

0

maybe it would be beneficial to convert it into an array and mapreduce or custom script can be use

db.matchedpairs.aggregate([
{ $project:{'set':['$x','$y']}},
{
        '$group': {
            '_id': '1', 
            'list': {
                '$addToSet': '$set'
            }
        }
},
{
  $out:'matchedpairs2'
}
]);


//gives => matchedpairs2

{
    "_id" : "1",
    "list" : [ 
        [ 
            1, 
            2
        ], 
        [ 
            9, 
            10
        ], 
        [ 
            2, 
            10
        ], 
        [ 
            8, 
            4
        ]
    ]
}
var map = function() {
  emit("list", this.list);
};

var emit = function(key, value) {
  const result = [];
  const result2 = [];

  value.map((item, i) => {
    const distinct = value.filter((w, j) => i != j);
    const convertset = [...new Set([].concat(...distinct))];
    const b = new Set(convertset);
    const intersection = item.filter(x => b.has(x));
    const diff = item.filter(x => !b.has(x));
    if (intersection.length > 0) result.push(item);
    if (diff.length > 0) result2.push(item);
  });

  const set1 = [...new Set([].concat(...result))];
  const set2 = [...new Set([].concat(...result2))];
  const w = new Set(set1);
  const diff2 = set2.filter(x => !w.has(x));
  const finalset = [...new Set([].concat(...diff2))]

  print(set1);
  print(finalset);
};

var myCursor = db.matchedpairs2.find({});

while (myCursor.hasNext()) {
  var doc = myCursor.next();
  map.apply(doc);
}

Result:


/* 1 */
[
    9.0,
    10.0,
    1.0,
    2.0
]

/* 2 */
[
    8.0,
    4.0
]
  • Thanks I can see how this would flatten the data structure but how do you see the $map and $reduce functions being used from this point to create the disjoint sub arrays? – Neal Shail Nov 19 '19 at 22:08