I am a beginner in Elasticsearch and I am working on a project where the requirement is to look for field values that ought to start with the pattern that has been provided.
I'm using a wildcard for the search pattern in this case. (Eg: Ste.*
).
Additionally, I am using the _msearch
option to search across multiple fields.
I want the matching records and also the count of matching records. I am using aggregate to get the count.
However, there are no issues when searching for numerical values. When I search for String, then there is a case-sensitive issue. When I search for Ste
, I can see results such as Stephen, Steven, and Stella.
When I search for ste
in Lowercase, nothing returns.
Below is the script that I tried:
import logging
import os
import json
import boto3
import requests
import elasticsearch
from elasticsearch import Elasticsearch
ES_Index_Name = 'emp_details_1_1'
ES_Cluster_IP = 'localhost'
def lambda_handlet(event):
ES_URL = f'http://{ES_Cluster_IP}:9200/{ES_Index_Name}/_msearch'
print("URL - ", ES_URL)
query_input = event
query_input_star = '.*'
query_input_re = query_input + query_input_star
searchString = [ {"index": "emp_details_1_1"},
{"_source":[],"size":0,"min_score":1,"query":{"prefix":{"firstname.keyword":query_input}},"aggs":{"firstname":{"terms":{"field":"firstname.keyword","include":query_input_re}},"firstname_count":{"value_count":{"field":"firstname.keyword"}}}},
{"index": "emp_details_1_1"},
{"_source":[],"size":0,"min_score":1,"query":{"prefix":{"phone.keyword":query_input}},"aggs":{"phone":{"terms":{"field":"phone.keyword","include":query_input_re}},"phone_count":{"value_count":{"field":"phone.keyword"}}}}]
searchQuery = ""
for d in searchString:
searchQuery += json.dumps(d) + "," + "\n"
print("API Global search fn: ",d)
print("searchQuery: ", searchQuery)
headers = {"Content-Type":"application/json","Accept":"test/plain"}
searchResponse = requests.get(url=ES_URL, headers=headers, data=searchQuery)
print("API Global response: ", json.loads(searchResponse.text))
lambda_handlet('M')
Also below is the mapping for the index:
{
"emp_details_1_1" : {
"mappings" : {
"properties" : {
"firstname" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"phone" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
}
}
}
}
Below is the sample output:
{'took': 190, 'responses': [{'took': 190, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 3, 'relation': 'eq'}, 'max_score': None, 'hits': []}, 'aggregations': {'firstname': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 'buckets': [{'key': 'Masha', 'doc_count': 1}, {'key': 'Millard', 'doc_count': 1}, {'key': 'Monte', 'doc_count': 1}]}, 'firstname_count': {'value': 3}}, 'status': 200}, {'took': 20, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}, 'aggregations': {'phone_count': {'value': 0}, 'phone': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 'buckets': []}}, 'status': 200}]}
How can I modify my script so that the search would perform case-insensitive searches regardless of whether I pass lowercase or uppercase strings?