I'm using Django Haystack with Elasticsearch as the backend for a real-time flight mapping service.
I have all my search indexes set up correctly, however, I'm having trouble returning results for searches that aren't full words (such as aviation callsigns, some of which take the style N346IF
, others include full words such as in Speedbird 500
). The N346IF
style of query doesn't yield any results, whereas I can easily return results for the latter example.
I make my query as below:
queryResults = SearchQuerySet().filter(content=q) # where q is the query in string format
(note that in the past I used the AutoQuery
queryset, but the documentation lists that this only tracks words, so I'm passing a raw string now).
I have my search index fields setup as EdgeNgramField
with search templates.
I have a custom backend with the following index settings (as well as both the snowball
analyzer and the pattern
analyzer):
ELASTICSEARCH_INDEX_SETTINGS = {
'settings': {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_ngram"]
},
"edgengram_analyzer": {
"type": "custom",
"tokenizer": "lowercase",
"filter": ["haystack_edgengram"]
}
},
"tokenizer": {
"haystack_ngram_tokenizer": {
"type": "nGram",
"min_gram": 4,
"max_gram": 15,
},
"haystack_edgengram_tokenizer": {
"type": "edgeNGram",
"min_gram": 4,
"max_gram": 15,
"side": "front"
}
},
"filter": {
"haystack_ngram": {
"type": "nGram",
"min_gram": 4,
"max_gram": 15
},
"haystack_edgengram": {
"type": "edgeNGram",
"min_gram": 4,
"max_gram": 15
}
}
}
}
}
ELASTICSEARCH_DEFAULT_ANALYZER = "pattern"
My backend is configured as:
class ConfigurableElasticBackend(ElasticsearchSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(ConfigurableElasticBackend, self).__init__(
connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS')
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
class ConfigurableElasticBackend(ElasticsearchSearchBackend):
DEFAULT_ANALYZER = "pattern"
def __init__(self, connection_alias, **connection_options):
super(ConfigurableElasticBackend, self).__init__(
connection_alias, **connection_options)
user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS')
user_analyzer = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER')
if user_settings:
setattr(self, 'DEFAULT_SETTINGS', user_settings)
if user_analyzer:
setattr(self, 'DEFAULT_ANALYZER', user_analyzer)
def build_schema(self, fields):
content_field_name, mapping = super(ConfigurableElasticBackend,
self).build_schema(fields)
for field_name, field_class in fields.items():
field_mapping = mapping[field_class.index_fieldname]
if field_mapping['type'] == 'string' and field_class.indexed:
if not hasattr(field_class, 'facet_for') and not \
field_class.field_type in('ngram', 'edge_ngram'):
field_mapping['analyzer'] = self.DEFAULT_ANALYZER
mapping.update({field_class.index_fieldname: field_mapping})
return (content_field_name, mapping)
class ConfigurableElasticSearchEngine(ElasticsearchSearchEngine):
backend = ConfigurableElasticBackend
What would be the correct setup in order to successfully yield results for search patterns that are both and/or N346IF
-style strings?
Appreciate any input, apologies if this is similar to another question (could not find anything related to it).
edit: requested by solarissmoke, the schema for this model:
class FlightIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True, use_template=True)
flight = indexes.CharField(model_attr='flightID')
callsign = indexes.CharField(model_attr='callsign')
displayName = indexes.CharField(model_attr='displayName')
session = indexes.CharField(model_attr='session')
def prepare_session(self, obj):
return obj.session.serverId
def get_model(self):
return Flight
Text is indexed as:
flight___{{ object.callsign }}___{{ object.displayName }}