5

Let's say I have a set of... burgers...

For each burger, I have a set of images relating to each component of the burger.

Unfortunately, there isn't any consistency in the structure of these components (I didn't write it).

Here is an example of two documents:

{
    "bunsResource": {
        "image": {
            "url": "./buns_1.png",
            "who": "Sam"
        },
        "buns": [
            {
                "image": {
                    "url": "./top-bun_1.png",
                    "who": "Jim"
                }
            },
            {
                "image": {
                    "url": "./bottom-bun_1.png",
                    "who": "Sarah"
                }
            }
        ]
    },
    "pattyResource": {
        "image": {
            "url": "./patties_1.png",
            "who": "Kathy"
        },
        "patties": [
            {
                "image": {
                    "url": "./patty_1.jpg",
                    "who": "Kathy"
                }
            }
        ]
    }
},
{
    "bunsResource": {
        "image": {
            "url": "./buns_2.png",
            "who": "Jim"
        },
        "buns": [
            {
                "image": {
                    "url": "./top-bun_2.png",
                    "who": "Jim"
                }
            },
            {
                "image": {
                    "url": "./bottom-bun_2.png",
                    "who": "Kathy"
                }
            }
        ]
    },
    "pattyResource": {
        "image": {
            "url": "./patties_1.png",
            "who": "Kathy"
        },
        "patties": [
            {
                "image": {
                    "url": "./patty_1.jpg",
                    "who": "Kathy"
                }
            }
        ]
    }
}

What I need is a set of photographer / image count.

{
    "who": "Sam",
    "count": 1
},
{
    "who": "Jim",
    "count": 3
},
{
    "who": "Sarah",
    "count": 2
},
{
    "who": "Kathy",
    "count": 2
}

That is a UNIQUE image count, mind you!

I haven't been able to figure out how to achieve this...

I assume that I need to first resolve each burger to a unique set of url / who, then aggregate from there, but I can't figure out how to get the flattened list of url / who per burger.

WebWanderer
  • 10,380
  • 3
  • 32
  • 51

1 Answers1

5

It depends on whether the patties and buns arrays are nested or not. If they are not, then it's easy, you can simply run a terms aggregation using a script that gathers all the who fields from everywhere in the document:

POST not-nested/_search 
{
  "size": 0,
  "aggs": {
    "script": {
      "terms": {
        "script": {
          "source": """
          def list = new ArrayList();
          list.addAll(doc['pattyResource.image.who.keyword'].values);
          list.addAll(doc['bunsResource.image.who.keyword'].values);
          list.addAll(doc['bunsResource.buns.image.who.keyword'].values);
          list.addAll(doc['pattyResource.patties.image.who.keyword'].values);
          return list;
          """
        }
      }
    }
  }
}

That will return this:

  "aggregations" : {
    "script" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Jim",
          "doc_count" : 2
        },
        {
          "key" : "Kathy",
          "doc_count" : 2
        },
        {
          "key" : "Sam",
          "doc_count" : 1
        },
        {
          "key" : "Sarah",
          "doc_count" : 1
        }
      ]
    }
  }

However, if it's nested, things get more complicated as you'll need some client-side work to figure out the final counts, but we can simplify that client-side work with a few aggregations:

POST nested/_search 
{
  "size": 0,
  "aggs": {
    "bunsWho": {
      "terms": {
        "field": "bunsResource.image.who.keyword"
      }
    },
    "bunsWhoNested": {
      "nested": {
        "path": "bunsResource.buns"
      },
      "aggs": {
        "who": {
          "terms": {
            "field": "bunsResource.buns.image.who.keyword"
          }
        }
      }
    },
    "pattiesWho": {
      "terms": {
        "field": "pattyResource.image.who.keyword"
      }
    },
    "pattiesWhoNested": {
      "nested": {
        "path": "pattyResource.patties"
      },
      "aggs": {
        "who": {
          "terms": {
            "field": "pattyResource.patties.image.who.keyword"
          }
        }
      }
    }
  }
}

That will return this:

  "aggregations" : {
    "pattiesWho" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Kathy",
          "doc_count" : 2
        }
      ]
    },
    "bunsWhoNested" : {
      "doc_count" : 4,
      "who" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Jim",
            "doc_count" : 2
          },
          {
            "key" : "Kathy",
            "doc_count" : 1
          },
          {
            "key" : "Sarah",
            "doc_count" : 1
          }
        ]
      }
    },
    "pattiesWhoNested" : {
      "doc_count" : 2,
      "who" : {
        "doc_count_error_upper_bound" : 0,
        "sum_other_doc_count" : 0,
        "buckets" : [
          {
            "key" : "Kathy",
            "doc_count" : 2
          }
        ]
      }
    },
    "bunsWho" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "Jim",
          "doc_count" : 1
        },
        {
          "key" : "Sam",
          "doc_count" : 1
        }
      ]
    }
  }

And then you can simply create some client-side logic (here some sample code in Node.js) that adds the numbers up:

var whos = {};
var recordWho = function(who, count) {
    whos[who] = (whos[who] || 0) + count;
};

resp.aggregations.pattiesWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.pattiesWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWho.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});
resp.aggregations.bunsWhoNested.who.buckets.forEach(function(b) {recordWho(b.key, b.doc_count)});

console.log(whos);

=>

{ Kathy: 5, Jim: 3, Sam: 1, Sarah: 1 }
Val
  • 207,596
  • 13
  • 358
  • 360