Question is for analyzing german company numbers.
I want to make the user be able to search numbers with auto-complete (search-as-you-type).
Cases:
- So when a user is entering "HRA 123", suggestion "HRA 12345" should come.
- Also for "HRA123" => "HRA 12345"
- "12345" => "HRA 12345"
- Want the numbers that are suggested also to be filtered/only of companies matching other filters, like if name is searched for example, the suggested numbers should be of companies holding that name.
The format is: HRA 123456 SL
The Prefix can be HRA, HRB, GnR, VR, PR.
The number can be any length 123 up to 678.
In some cases there can be a suffix like "HL".
I created a custom analyzer that removes whitespace, makes everything uppercase.
"number_analyzer": {
"tokenizer": "keyword",
"filter": [
"trim",
"whitespace_remove",
"uppercase",
"asciifolding"
]
}
Mapped the company number field as follow:
"company_number":{
"type":"text",
"analyzer": "number_analyzer",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
Now I want to be able to query
{
"query": {
"multi_match": {
"query": "VR1",
"fields": [
"company_number",
"company_number.search-as-you-type",
"company_number.search-as-you-type._2gram",
"company_number.search-as-you-type._3gram"
]
}
}
}
But it's not working, I'm not getting autocomplete results.
Here a list of companies you could import
{ "index":{} }
{ "company_number" : "HRB 10474", "full_address" : null, "name" : "Stolberger Feinmetall Bransch GmbH", "registrar" : "Aachen", "native_company_number" : "Aachen HRB 10474", "office" : "Aachen" }
{ "index":{} }
{ "company_number" : "VR 10474", "full_address" : null, "name" : "Strudel e.V.", "registrar" : "Berlin (Charlottenburg)", "native_company_number" : "Berlin (Charlottenburg) VR 10474", "office" : "Berlin (Charlottenburg)" }
{ "index":{} }
{ "company_number" : "VR 10474", "full_address" : null, "name" : "SV Rengsdorf 1926 e.V.", "registrar" : "Montabaur", "native_company_number" : "Montabaur VR 10474", "office" : "Montabaur" }
{ "index":{} }
{ "company_number" : "HRA 10474", "full_address" : null, "name" : "Synergie Diffusion OHG", "registrar" : "Saarbrücken", "native_company_number" : "Saarbrücken HRA 10474", "office" : "Saarbrücken" }
{ "index":{} }
{ "company_number" : "HRB 10474", "full_address" : null, "name" : "T U R E K GmbH", "registrar" : "Nürnberg", "native_company_number" : "Nürnberg HRB 10474", "office" : "Nürnberg" }
{ "index":{} }
{ "company_number" : "HRA 10474", "full_address" : "Friedrichstraße 25, 65185 Wiesbaden.", "name" : "T-Rex Hotel GmbH & Co. KG", "registrar" : "Wiesbaden", "native_company_number" : "Wiesbaden HRA 10474", "office" : "Wiesbaden" }
{ "index":{} }
{ "company_number" : "HRB 10474", "full_address" : null, "name" : "team work Filmproduktion GmbH", "registrar" : "Dortmund", "native_company_number" : "Dortmund HRB 10474", "office" : "Dortmund" }
{ "index":{} }
{ "company_number" : "VR 10474", "full_address" : null, "name" : "Tennis-Club Osann-Monzel e.V.", "registrar" : "Wittlich", "native_company_number" : "Wittlich VR 10474", "office" : "Wittlich" }
{ "index":{} }
{ "company_number" : "HRB 10474", "full_address" : "Behrensallee 20, 25421 Pinneberg", "name" : "The Tradehouse GmbH", "registrar" : "Pinneberg", "native_company_number" : "Pinneberg HRB 10474 PI", "office" : "Pinneberg PI" }
full settings file
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"analysis": {
"filter": {
"german_stop": {
"type": "stop",
"stopwords": "_german_"
},
"german_stemmer": {
"type": "stemmer",
"language": "light_german"
},
"snowball": {
"type": "snowball",
"language": "German2"
},
"german_phonetic": {
"type": "phonetic",
"encoder": "koelnerphonetik",
"replace": false
},
"address_synonyms": {
"type": "synonym",
"synonyms": [ "str, strasse, straße => strass" ]
},
"whitespace_remove": {
"type": "pattern_replace",
"pattern": " ",
"replacement": ""
}
},
"analyzer": {
"names_analyzer": {
"type": "custom",
"tokenizer": "standard",
"char_filter": [
"html_strip"
],
"filter": [
"lowercase",
"word_delimiter",
"german_normalization",
"german_phonetic",
"asciifolding",
"apostrophe"
]
},
"address_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"german_normalization",
"german_stop",
"snowball",
"german_stemmer",
"address_synonyms",
"german_phonetic",
"asciifolding",
"apostrophe",
"word_delimiter"
]
},
"number_analyzer": {
"tokenizer": "keyword",
"filter": [
"trim",
"whitespace_remove",
"uppercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"properties": {
"all_names":{
"type":"text",
"analyzer": "names_analyzer",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
"name":{
"type":"text",
"analyzer": "names_analyzer",
"copy_to": "all_names"
},
"alt_names":{
"type":"nested",
"include_in_root": true,
"properties":{
"name": {
"type": "text",
"analyzer": "names_analyzer",
"copy_to": "all_names" },
"office":{
"type": "keyword",
"copy_to": "all_offices",
"fields":{
"text":{
"type":"text"
}
}
}
}
},
"branches":{
"type":"nested",
"include_in_root": true,
"properties":{
"name": {
"type": "text",
"analyzer": "names_analyzer",
"copy_to": "all_names"
},
"office":{
"type": "keyword",
"copy_to": "all_offices",
"fields":{
"text":{
"type":"text"
}
}
}
}
},
"full_address":{
"type":"text",
"analyzer": "address_analyzer",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
"all_address":{
"type":"text",
"analyzer": "address_analyzer",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
"street":{
"type":"text",
"analyzer": "address_analyzer",
"copy_to": "all_address",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
"zipcode":{
"type":"keyword",
"copy_to": "all_address",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
},
"text": {
"type":"text"
}
}
},
"city":{
"type":"keyword",
"copy_to": [ "all_offices", "all_address" ],
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
},
"text":{
"type":"text",
"analyzer": "address_analyzer"
}
}
},
"legal_form":{
"type":"keyword",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
},
"text":{
"type":"text"
}
}
},
"all_offices":{
"type":"keyword",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
},
"text":{
"type":"text"
}
}
},
"office":{
"type":"keyword",
"copy_to": "all_offices"
},
"registrar":{
"type":"keyword",
"copy_to": "all_offices"
},
"former_registrar":{
"type":"keyword",
"copy_to": "all_offices"
},
"state":{
"type":"keyword",
"fields":{
"text":{
"type":"text"
}
}
},
"company_number":{
"type":"text",
"analyzer": "number_analyzer",
"fields":{
"search-as-you-type":{
"type":"search_as_you_type"
}
}
},
"status":{
"type":"keyword",
"fields":{
"text":{
"type":"text"
}
}
}
}
}
}