0

Here structure of my index:

[
    {
        "Id":"1",
        "Path":"/Series/Current/SerieA/foo/foo",
        "PlayCount":100
    },
    {
        "Id":"2",
        "Path":"/Series/Current/SerieA/bar/foo",
        "PlayCount":1000
    },
    {
        "Id":"3",
        "Path":"/Series/Current/SerieA/bar/bar",
        "PlayCount":50
    },
    {
        "Id":"4",
        "Path":"/Series/Current/SerieB/bla/bla",
        "PlayCount":300
    },
    {
        "Id":"5",
        "Path":"/Series/Current/SerieB/goo/boo",
        "PlayCount":200
    },
    {
        "Id":"6",
        "Path":"/Series/Current/SerieC/foo/zoo",
        "PlayCount":100
    }
]

I'd like to execute an aggregation that bring me sum of "PlayCount" for each Series like:

[
    {
        "key":"serieA",
        "TotalPlayCount":1150
    },
    {
        "key":"serieB",
        "TotalPlayCount":500
    },
    {
        "key":"serieC",
        "TotalPlayCount":100
    }
]

This is how I try to do it but obviously query fails since this is not the proper way:

{
    "size": 0,
    "query":{
        "filtered":{
            "query":{
                "regexp":{
                    "Path":"/Series/Current/.*"
                }
            }
        }
    },
    "aggs":{
        "play_count_for_current_series":{
            "terms": {
                "field": "Path", 
                "regexp": "/Series/Current/([^/]+)"
            },
            "aggs":{
                "Total_play": { "sum": { "field": "PlayCount" } }
            }
        }
    }
}

Is there a way to do it?

gunererd
  • 651
  • 9
  • 19

1 Answers1

0

My suggestion is as follows:

DELETE test
PUT /test
{
  "settings": {
    "analysis": {
      "filter": {
        "my_special_filter": {
          "type": "pattern_capture",
          "preserve_original": 0,
          "patterns": [
            "/Series/Current/([^/]+)"
          ]
        }
      },
      "analyzer": {
        "my_special_analyzer": {
          "tokenizer": "whitespace",
          "filter": [
            "my_special_filter"
          ]
        }
      }
    }
  },
  "mappings": {
    "test": {
      "properties": {
        "Path": {
          "type": "string",
          "fields": {
            "for_aggregations": {
              "type": "string",
              "analyzer": "my_special_analyzer"
            },
            "raw": {
              "type": "string",
              "index": "not_analyzed"
            }
          }
        }
      }
    }
  }
}

Create a special analyzer that uses a pattern_capture filter to catch only those terms that you are interested. Because I didn't want to change your current mapping for that field I added a fields section with a sub-field that will use this special analyzer. I also added a raw field which is not_analyzed which will help with the query itself.

POST test/test/_bulk
{"index":{}}
{"Id":"1","Path":"/Series/Current/SerieA/foo/foo","PlayCount":100}
{"index":{}}
{"Id":"2","Path":"/Series/Current/SerieA/bar/foo","PlayCount":1000}
{"index":{}}
{"Id":"3","Path":"/Series/Current/SerieA/bar/bar","PlayCount":50}
{"index":{}}
{"Id":"4","Path":"/Series/Current/SerieB/bla/bla","PlayCount":300}
{"index":{}}
{"Id":"5","Path":"/Series/Current/SerieB/goo/boo","PlayCount":200}
{"index":{}}
{"Id":"6","Path":"/Series/Current/SerieC/foo/zoo","PlayCount":100}
{"index":{}}
{"Id":"7","Path":"/Sersdasdies/Curradent/SerieC/foo/zoo","PlayCount":100}

For the query, you don't need the regular expression in the query because your aggregation will use that sub-field which only has your needed SerieX terms.

GET /test/test/_search
{
  "size": 0,
  "query": {
    "filtered": {
      "query": {
        "regexp": {
          "Path.raw": "/Series/Current/.*"
        }
      }
    }
  },
  "aggs": {
    "play_count_for_current_series": {
      "terms": {
        "field": "Path.for_aggregations"
      },
      "aggs": {
        "Total_play": {
          "sum": {
            "field": "PlayCount"
          }
        }
      }
    }
  }
}

And the result is

  "play_count_for_current_series": {
     "doc_count_error_upper_bound": 0,
     "sum_other_doc_count": 0,
     "buckets": [
        {
           "key": "SerieA",
           "doc_count": 3,
           "Total_play": {
              "value": 1150
           }
        },
        {
           "key": "SerieB",
           "doc_count": 2,
           "Total_play": {
              "value": 500
           }
        },
        {
           "key": "SerieC",
           "doc_count": 1,
           "Total_play": {
              "value": 100
           }
        }
     ]
  }
Andrei Stefan
  • 51,654
  • 6
  • 98
  • 89