How do I add the following german_phonebook analyzer to elastic search using elastic4s?
"index": {
"analysis": {
"analyzer": {
"german": {
"filter": [
"lowercase",
"german_stop",
"german_normalization",
"german_stemmer"
],
"tokenizer": "standard"
},
"german_phonebook": {
"filter": [
"german_phonebook"
],
"tokenizer": "keyword"
},
"mySynonyms": {
"filter": [
"lowercase",
"mySynonymFilter"
],
"tokenizer": "standard"
}
},
"filter": {
"german_phonebook": {
"country": "CH",
"language": "de",
"type": "icu_collation",
"variant": "@collation=phonebook"
},
"german_stemmer": {
"language": "light_german",
"type": "stemmer"
},
"german_stop": {
"stopwords": "_german",
"type": "stop"
},
"mySynonymFilter": {
"synonyms": [
"swisslift,lift"
],
"type": "synonym"
}
}
},
The core question here is which filter to use for the german_phonebook filter of type icu_collation?
...
Following the answer I came up with this code:
case class GPhonebook() extends TokenFilterDefinition {
val filterType = "phonebook"
def name = "german_phonebook"
override def build(source: XContentBuilder): Unit = {
source.field("tokenizer", "keyword")
source.field("country", "CH")
source.field("language", "de")
source.field("type", "icu_collation")
source.field("variant", "@collation=phonebook")
}
}
The analyzer definition looks like this now:
CustomAnalyzerDefinition(
"german_phonebook",
KeywordTokenizer("myKeywordTokenizer2"),
GPhonebook()
)