I have a raw aggregation script like below, but have hard time to convert it into elasticsearch dsl.
I've read the document and found the description saying that we may use the .bucket(), .metric() and .pipeline() methods for nesting aggregations but lacking information on further illustrating how to use those three for more complex aggregations, like more layers.
{
"aggs": {
"statistics": {
"terms": {
"field":"id"
},
"aggs":{
"date":{
"date_histogram":{
"min_doc_count":0,
"field":"date",
"interval":"1d",
"format":"yyyy-MM-dd"
},
"aggs":{
"column_a":{
"avg":{
"field":"column_a"
}
},
"column_b":{
"avg":{
"field":"column_b"
}
},
"column_c":{
"avg":{
"field":"column_c"
}
},
"a_gap":{
"serial_diff":{
"buckets_path":"column_a"
}
},
"b_gap":{
"serial_diff":{
"buckets_path":"column_b"
}
},
"c_gap":{
"serial_diff":{
"buckets_path":"column_c"
}
}
}
},
"sum_a_gap":{
"sum_bucket":{
"buckets_path":"date>a_gap"
}
},
"sum_b_gap":{
"sum_bucket":{
"buckets_path":"date>b_gap"
}
},
"sum_c_gap":{
"sum_bucket":{
"buckets_path":"date>c_gap"
}
}
}
}
}
}
My Elasticsearch-dsl query like this makes the 'sum_a_gap' the same level as 'column_a' and 'a_gap'.
self._search.aggs
.bucket('statistics', 'terms', field='id')
.bucket('date', 'date_histogram', field='date',
interval='1d', min_doc_count=0, format='yyyy-MM-dd')
.metric('column_a', 'avg', field='column_a')
.metric('column_b', 'avg', field='column_b')
.metric('column_c', 'avg', field='column_c')
.pipeline('a_gap', 'serial_diff', buckets_path='column_a')
.pipeline('b_gap', 'serial_diff', buckets_path='column_b')
.pipeline('c_gap', 'serial_diff', buckets_path='column_c')
.pipeline('sum_a_gap', 'sum_bucket', buckets_path='date>a_gap')
.pipeline('sum_b_gap', 'sum_bucket', buckets_path='date>b_gap')
.pipeline('sum_c_gap', 'sum_bucket', buckets_path='date>c_gap')
Many thanks in advance!