0

I want to make a function that will take as arguments a list of hostnames and a list of fields and dynamically prepare an efficient query to elasticserach to retrieve the last values for that fields for every host passed. The same way as I can do it using kibana.

In kibana this looks like this: enter image description here

Query that Kibana generates:

{
  "aggs": {
    "2": {
      "terms": {
        "field": "host.hostname",
        "order": {
          "_key": "desc"
        },
        "size": 5
      },
      "aggs": {
        "1": {
          "top_hits": {
            "fields": [
              {
                "field": "host.ip"
              }
            ],
            "_source": false,
            "size": 1,
            "sort": [
              {
                "@timestamp": {
                  "order": "desc"
                }
              }
            ]
          }
        },
        "3": {
          "top_hits": {
            "fields": [
              {
                "field": "source.ip"
              }
            ],
            "_source": false,
            "size": 1,
            "sort": [
              {
                "@timestamp": {
                  "order": "desc"
                }
              }
            ]
          }
        }
      }
    }
  },
  "size": 0,
  "script_fields": {},
  "stored_fields": [
    "*"
  ],
  "runtime_mappings": {},
  "query": {
    "bool": {
      "must": [],
      "filter": [
        {
          "bool": {
            "should": [
              {
                "bool": {
                  "should": [
                    {
                      "match_phrase": {
                        "host.hostname": "p-hostname-a"
                      }
                    }
                  ],
                  "minimum_should_match": 1
                }
              },
              {
                "bool": {
                  "should": [
                    {
                      "match_phrase": {
                        "host.hostname": "P-hostname-H"
                      }
                    }
                  ],
                  "minimum_should_match": 1
                }
              }
            ],
            "minimum_should_match": 1
          }
        },
        {
          "range": {
            "@timestamp": {
              "format": "strict_date_optional_time",
              "gte": "2022-09-20T07:57:26.189Z",
              "lte": "2022-09-21T07:57:26.189Z"
            }
          }
        }
      ],
      "should": [],
      "must_not": []
    }
  }
}

My approach in python:

 def get_data_from_elastic(self, fields_to_get, set_of_host_hostname):
        s = Search().using(client=self.es_con).index(self.METRICBEAT_INDEX_NAME)
        s = s.filter("range", ** {'@timestamp': {'gte': 'now-3600m/m'}})
        set_of_host_hostname = list(set_of_host_hostname)

        set_of_host_hostname = set_of_host_hostname[0:2]

        s.aggs.bucket(name=f"main_bucket", agg_type="terms", field="host.hostname", size=1)
        for field in fields_to_get:
            s.aggs["main_bucket"].metric(name=f"name_{field}", agg_type="top_hits", fields=[{"field":field}])



        s.source(fields=fields_to_get)
        main_q = Q("bool", minimum_should_match=1)
        for host_hostname in set_of_host_hostname:
            q = Q("match", **{"host.hostname":host_hostname})
            main_q.should.append(q)
        s = s.query(main_q)
        # print("\n\n\n")
        pp(s.to_dict())
        print("\n\n\n")
        res = s.execute()
        for hit in res:
            print(hit)
            print(hit.host.ip)

Results:

{    "aggs":{
      "main_bucket":{
         "aggs":{
            "name_host.ip":{
               "top_hits":{
                  "fields":[
                     {
                        "field":"host.ip"
                     }
                  ]
               }
            },
            "name_host.os.type":{
               "top_hits":{
                  "fields":[
                     {
                        "field":"host.os.type"
                     }
                  ]
               }
            }
         },
         "terms":{
            "field":"host.hostname",
            "size":1
         }
      }    },    "query":{
      "bool":{
         "filter":[
            {
               "range":{
                  "@timestamp":{
                     "gte":"now-3600m/m"
                  }
               }
            }
         ],
         "minimum_should_match":1,
         "should":[
            {
               "match":{
                  "host.hostname":"P-hostname-2"
               }
            },
            {
               "match":{
                  "host.hostname":"P-hostname-1"
               }
            }
         ]
      }    } }

<Hit(metricbeat-7.17.1-system-2022.38/AEESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/AUESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/AkESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/A0ESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/BEESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/BUESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/BkESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/B0ESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/CEESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']
<Hit(metricbeat-7.17.1-system-2022.38/CUESYIMB2-liSoZOlAYu): {'agent': {'version': '7.17.1'}, '@timestamp': '2022-09-21T1...}>
['1thesameip0']

I have tried a few variations but could not replicate the kibana query. Can someone guide me on how to replicate kibana query?

The version of elasticserach-dsl is 7.4

Adrian
  • 149
  • 3
  • 16

1 Answers1

0

I have got a working solution:

The q_main should have been added to .filter

The general "size=0" option should have been added to not return any documents

The aggregation field from results should have been explicitly checked for results.

The parameter "size" for the bucket should have been of the size of number of hosts

The parameters "_source=False, size=1" should have been added to the metric to return only one record of required fields.

def get_data_from_elastic(self, fields_to_get, set_of_host_hostname):
    fields_to_get = list(fields_to_get)
    s = Search().using(client=self.es_con).index(self.METRICBEAT_INDEX_NAME)
    s = s.filter("range", ** {'@timestamp': {'gte': 'now-24h/h'}})
    set_of_host_hostname = list(set_of_host_hostname)
    set_of_host_hostname = set_of_host_hostname[0:2]
    s.aggs.bucket(name="main_bucket", agg_type="terms", field="host.hostname", size=len(set_of_host_hostname))
    s.update_from_dict({"size":0})
    for field in fields_to_get:
        s.aggs[f"main_bucket"].metric(name=f"name_{field}", agg_type="top_hits", fields=[{"field": field}], _source=False, size=1)
        s.aggs[f"main_bucket"].metric(name=f"name_{field}", agg_type="top_hits", fields=[{"field": field}], _source=False, size=1)

    s = s.source(False)
    main_q = Q("bool", minimum_should_match=1)
    for host_hostname in set_of_host_hostname:
        bool_q = Q("bool", minimum_should_match=1)
        q = Q("match_phrase", **{"host.hostname":host_hostname})
        bool_q.should.append(q)
        main_q.should.append(bool_q)
    s = s.filter(main_q)

    res = s.execute()
    for hit in res.aggregations:
        for bucket in hit["buckets"]:
            pp(bucket.to_dict())
Adrian
  • 149
  • 3
  • 16