I have an autocomplete analyser for a field("keywords"). This field is an array of strings. When I query with a search string I want to show first the documents where a single element of the array keywords matches best. The problem is that if a part of the string matches with more elements of the array "keywords", then this document appears before another that has less but better matches. For example, if I have a query with the word "gas station" the returning documents' keywords are these:
"hits": [
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761acd",
"_score": 3.1974547,
"_source": {
"keywords": [
"Radio Station",
"Radio Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076d8dc536a03b4761cc3",
"_score": 3.0407648,
"_source": {
"keywords": [
"Stationery Store",
"Stationery Store"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "5810767ddc536a03b4761ace",
"_score": 2.903595,
"_source": {
"keywords": [
"TV Station",
"TV Station"
]
}
},
{
"_index": "locali_v3",
"_type": "categories",
"_id": "581076cddc536a03b4761c87",
"_score": 2.517158,
"_source": {
"keywords": [
"Praktoreio Ugrwn Kausimwn/Gkaraz",
"Praktoreio Ygrwn Kaysimwn/Gkaraz",
"Praktoreio Ugron Kausimon/Gkaraz",
"Praktoreio Ygron Kaysimon/Gkaraz",
"Πρακτορείο Υγρών Καυσίμων/Γκαράζ",
"Gas Station"
]
}
}
The "Gas Station" is fourth, although it has the best single element matching. Is there a way to tell ElasticSearch that I do not care about how many times "gas" or "station" appears in keywords? I want the max element of the array keywords match as the score factor.
My settings are:
{
"locali": {
"settings": {
"index": {
"creation_date": "1480937810266",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"keywords": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"char_filter": [
"my_char_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"ί => ι",
"Ί => Ι",
"ή => η",
"Ή => Η",
"ύ => υ",
"Ύ => Υ",
"ά => α",
"Ά => Α",
"έ => ε",
"Έ => Ε",
"ό => ο",
"Ό => Ο",
"ώ => ω",
"Ώ => Ω",
"ϊ => ι",
"ϋ => υ",
"ΐ => ι",
"ΰ => υ"
]
}
}
},
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "TJjOt9L9QE2HrsUFHM6zJg",
"version": {
"created": "2040099"
}
}
}
}
}
And the mappings:
{
"locali": {
"mappings": {
"places": {
"properties": {
"formattedCategories": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"location": {
"properties": {
"formattedAddress": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"locality": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"neighbourhood": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"rating": {
"properties": {
"rating": {
"type": "long"
}
}
},
"seenDetails": {
"type": "long"
},
"verified": {
"type": "long"
}
}
},
"regions": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"loc": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"type": {
"type": "long"
},
"weight": {
"type": "long"
}
}
},
"categories": {
"properties": {
"keywords": {
"type": "string",
"analyzer": "keywords"
},
"name": {
"properties": {
"english": {
"type": "string"
},
"greek": {
"type": "string"
}
}
},
"weight": {
"type": "long"
}
}
}
}
}
}