1

Question is for analyzing german company numbers.

I want to make the user be able to search numbers with auto-complete (search-as-you-type).

Cases:

  1. So when a user is entering "HRA 123", suggestion "HRA 12345" should come.
  2. Also for "HRA123" => "HRA 12345"
  3. "12345" => "HRA 12345"
  4. Want the numbers that are suggested also to be filtered/only of companies matching other filters, like if name is searched for example, the suggested numbers should be of companies holding that name.

The format is: HRA 123456 SL

The Prefix can be HRA, HRB, GnR, VR, PR.

The number can be any length 123 up to 678.

In some cases there can be a suffix like "HL".

I created a custom analyzer that removes whitespace, makes everything uppercase.

                "number_analyzer": {
                    "tokenizer": "keyword",
                    "filter": [
                        "trim",
                        "whitespace_remove",
                        "uppercase",
                        "asciifolding"
                    ]
                }

Mapped the company number field as follow:

             "company_number":{
                "type":"text",
                "analyzer": "number_analyzer",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   }
                }
             },

Now I want to be able to query

{
    "query": {
        "multi_match": {
        "query": "VR1",
        "fields": [
        "company_number",
        "company_number.search-as-you-type",
        "company_number.search-as-you-type._2gram",
        "company_number.search-as-you-type._3gram"
        ]
        }
  }
}

But it's not working, I'm not getting autocomplete results.

Here a list of companies you could import

{ "index":{} }
 { "company_number" : "HRB 10474", "full_address" : null, "name" : "Stolberger Feinmetall Bransch GmbH", "registrar" : "Aachen", "native_company_number" : "Aachen HRB 10474", "office" : "Aachen" } 
{ "index":{} }
 { "company_number" : "VR 10474", "full_address" : null, "name" : "Strudel e.V.", "registrar" : "Berlin (Charlottenburg)", "native_company_number" : "Berlin (Charlottenburg) VR 10474", "office" : "Berlin (Charlottenburg)" } 
{ "index":{} }
 { "company_number" : "VR 10474", "full_address" : null, "name" : "SV Rengsdorf 1926 e.V.", "registrar" : "Montabaur", "native_company_number" : "Montabaur VR 10474", "office" : "Montabaur" } 
{ "index":{} }
 { "company_number" : "HRA 10474", "full_address" : null, "name" : "Synergie Diffusion OHG", "registrar" : "Saarbrücken", "native_company_number" : "Saarbrücken HRA 10474", "office" : "Saarbrücken" } 
{ "index":{} }
 { "company_number" : "HRB 10474", "full_address" : null, "name" : "T U R E K GmbH", "registrar" : "Nürnberg", "native_company_number" : "Nürnberg HRB 10474", "office" : "Nürnberg" } 
{ "index":{} }
 { "company_number" : "HRA 10474", "full_address" : "Friedrichstraße 25, 65185 Wiesbaden.", "name" : "T-Rex Hotel GmbH & Co. KG", "registrar" : "Wiesbaden", "native_company_number" : "Wiesbaden HRA 10474", "office" : "Wiesbaden" } 
{ "index":{} }
 { "company_number" : "HRB 10474", "full_address" : null, "name" : "team work Filmproduktion GmbH", "registrar" : "Dortmund", "native_company_number" : "Dortmund HRB 10474", "office" : "Dortmund" } 
{ "index":{} }
 { "company_number" : "VR 10474", "full_address" : null, "name" : "Tennis-Club Osann-Monzel e.V.", "registrar" : "Wittlich", "native_company_number" : "Wittlich VR 10474", "office" : "Wittlich" } 
{ "index":{} }
 { "company_number" : "HRB 10474", "full_address" : "Behrensallee 20, 25421 Pinneberg", "name" : "The Tradehouse GmbH", "registrar" : "Pinneberg", "native_company_number" : "Pinneberg HRB 10474 PI", "office" : "Pinneberg PI" } 

full settings file

{
    "settings": {
        "index": {
        "number_of_shards": 1,
        "number_of_replicas": 0
        },
        "analysis": {
            "filter": {
                "german_stop": {
                "type":       "stop",
                "stopwords":  "_german_" 
                },
                "german_stemmer": {
                "type":       "stemmer",
                "language":   "light_german"
                },
                "snowball": {
                    "type": "snowball",
                    "language": "German2"
                },
                "german_phonetic": {
                    "type": "phonetic",
                    "encoder": "koelnerphonetik",
                    "replace": false
                },
                "address_synonyms": {
                    "type": "synonym",
                    "synonyms": [ "str, strasse, straße => strass" ]
                },
                "whitespace_remove": {
                    "type": "pattern_replace",
                    "pattern": " ",
                    "replacement": ""
                }
            },      
            "analyzer": {
                "names_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "char_filter": [
                        "html_strip"
                      ],
                    "filter": [
                        "lowercase",
                        "word_delimiter",
                        "german_normalization",
                        "german_phonetic",
                        "asciifolding",
                        "apostrophe"
                    ]
                },
                
                "address_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "german_normalization",
                        "german_stop",
                        "snowball",
                        "german_stemmer",
                        "address_synonyms",
                        "german_phonetic",
                        "asciifolding",
                        "apostrophe",
                        "word_delimiter"
                    ]
                },

                "number_analyzer": {
                    "tokenizer": "keyword",
                    "filter": [
                        "trim",
                        "whitespace_remove",
                        "uppercase",
                        "asciifolding"
                    ]
                }
            }
        }
    },
    "mappings": {
        "properties": {

            "all_names":{
                "type":"text",
                "analyzer": "names_analyzer",
                "fields":{
                    "search-as-you-type":{
                    "type":"search_as_you_type"
                    }
                }
            },

            "name":{
                "type":"text",
                "analyzer": "names_analyzer",
                "copy_to": "all_names"
            },
    
            "alt_names":{
                "type":"nested",
                "include_in_root": true, 
                "properties":{
                    "name": {
                        "type": "text",
                        "analyzer": "names_analyzer",
                        "copy_to": "all_names"                  },
                    "office":{
                        "type": "keyword",
                        "copy_to": "all_offices",
                        "fields":{
                            "text":{
                                "type":"text"
                            }
                        }
                    }
                }
            },
            "branches":{
                "type":"nested",
                "include_in_root": true, 
                "properties":{
                    "name": {
                        "type": "text",
                        "analyzer": "names_analyzer",
                        "copy_to": "all_names"
                    },
                    "office":{
                        "type": "keyword",
                        "copy_to": "all_offices",
                        "fields":{
                            "text":{
                                "type":"text"
                            }
                        }
                    }
                }
            },
    
            "full_address":{
                "type":"text",
                "analyzer": "address_analyzer",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   }
                }
             },

             "all_address":{
                "type":"text",
                "analyzer": "address_analyzer",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   }
                }
             },
             "street":{
                "type":"text",
                "analyzer": "address_analyzer",
                "copy_to": "all_address",
                "fields":{
                    "search-as-you-type":{
                        "type":"search_as_you_type"
                    }
                }
            },
            "zipcode":{
                "type":"keyword",
                "copy_to": "all_address",
                "fields":{
                    "search-as-you-type":{
                        "type":"search_as_you_type"
                    },
                    "text": {
                        "type":"text"
                    }
                }
            },
            "city":{
                "type":"keyword",
                "copy_to": [ "all_offices", "all_address" ],
                "fields":{
                    "search-as-you-type":{
                        "type":"search_as_you_type"
                    },
                    "text":{
                        "type":"text",
                        "analyzer": "address_analyzer"
                    }
                }
             },

             "legal_form":{
                "type":"keyword",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   },
                   "text":{
                    "type":"text"
                }
                }
             },

             "all_offices":{
                "type":"keyword",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   },
                   "text":{
                    "type":"text"
                }
                }
             },

             "office":{
                "type":"keyword",
                "copy_to": "all_offices"
             },
    
             "registrar":{
                "type":"keyword",
                "copy_to": "all_offices"
             },
    
             "former_registrar":{
                "type":"keyword",
                "copy_to": "all_offices"
             },
    
             "state":{
                "type":"keyword",
                "fields":{
                   "text":{
                    "type":"text"
                    }
                }
             },
    
             "company_number":{
                "type":"text",
                "analyzer": "number_analyzer",
                "fields":{
                   "search-as-you-type":{
                      "type":"search_as_you_type"
                   }
                }
             },
    
             "status":{
                "type":"keyword",
                "fields":{
                   "text":{
                    "type":"text"
                    }
                }
             }

        
        }
      }

}
I. T.
  • 21
  • 4
  • try the n-gram analyzer and check – Sagar Patel Dec 13 '21 at 09:34
  • Please add more details like which version of elasticsearch you are using and also all the analyazer defination. I can see in this field name "company_number.search-as-you-type._2gram", "company_number.search-as-you-type._3gram" but analyzer configuration is not available in your question. Please put all the details for getting accurate and fast response. – Sagar Patel Dec 13 '21 at 09:36
  • @SagarPatel added full mapping and settings – I. T. Dec 13 '21 at 11:01

0 Answers0