0

I have a problem with ElasticSearch mapping. for example the mapping for field name is:

{
    "name": {
        "type": "keyword",
        "fields": {
            "ngram": {
                "type": "text",
                "analyzer": "ngram_analyzer",
                "search_analyzer": "ngram_analyzer"
            },
            "word": {
                "type": "text",
                "analyzer": "word_analyzer",
                "search_analyzer": "word_analyzer"
            }
        }
    }
}

Whole mapping works except search_analyzer which it seems ElasticSearch ignores that.

analysis settings:

{
   "analysis":{
      "analyzer":{
         "ngram_analyzer":{
            "type":"custom",
            "char_filter":[
               "number_char_filter_map",
               "remove_duplicates"
            ],
            "tokenizer":"ngram_tokenizer_whitespace",
            "filter":[
               "lowercase",
               "english_stop"
            ]
         },
         "word_analyzer":{
            "type":"custom",
            "char_filter":[
               "number_char_filter_map",
               "remove_duplicates"
            ],
            "tokenizer":"word_tokenizer",
            "filter":[
               "lowercase",
               "english_stop"
            ]
         }
      },
      "char_filter":{
         "remove_duplicates":{
            "type":"pattern_replace",
            "pattern":"(.)(?=\\1)",
            "replacement":""
         },
         "remove_white_spaces":{
            "type":"pattern_replace",
            "pattern":"(\s)",
            "replacement":""
         }
      },
      "filter":{
         "english_stop":{
            "type":"stop",
            "ignore_case":true,
            "stopwords":"_english_"
         }
      },
      "tokenizer":{
         "ngram_tokenizer":{
            "type":"ngram",
            "min_gram":2,
            "max_gram":7
         },
         "ngram_tokenizer_whitespace":{
            "type":"ngram",
            "min_gram":2,
            "max_gram":7,
            "token_chars":[
               "letter",
               "digit",
               "punctuation",
               "symbol"
            ]
         },
         "word_tokenizer":{
            "type":"standard"
         }
      }
   }
}

According to ElasticSearch documentations, I didn't find any definition for search_analyzer in fields. If this method doe's not work, is there any alternative structure to include search analyzer?

Saeed K
  • 13
  • 6
  • What about this? https://www.elastic.co/guide/en/elasticsearch/reference/current/search-analyzer.html What makes you think that `... ElasticSearch ignores that` ? – Val Aug 31 '18 at 13:45
  • @Val thank you. but this is a normal field with single analyzer, **what i need is multiple fields with multiple analyzers for better search results**. this way works for just **one** field. and with this way i have to clone one field multiple times which effects the _source property of result which is not a good looking result. i have problem with **fields** inside a property which does not accept search_analyzer. – Saeed K Aug 31 '18 at 17:04
  • @Val Do you have any idea for solution? i would be very thankful if you share your idea – Saeed K Sep 02 '18 at 15:39

1 Answers1

0

Problem solved after couple days... The problem was using same analyzer as search_analyzer. I just set another analyzer with different name in settings (using same key as analyzer and search_analyzer seems to cause problem and ignore search_analyzer).

Cloned ngram_analyzer to ngram_search_analyzer Cloned word_analyzer to word_search_analyzer

And the create mapping request:

PUT /suggestions HTTP/1.1
Host: localhost:9200
Content-Type: application/json

{
   "mappings":{
      "doc":{
         "properties":{
            "caption":{
               "type":"keyword",
               "fields":{
                  "ngram":{
                     "type":"text",
                     "analyzer":"ngram_analyzer",
                     "search_analyzer":"ngram_search_analyzer"
                  },
                  "word":{
                     "type":"text",
                     "analyzer":"word_analyzer",
                     "search_analyzer":"word_search_analyzer"
                  }
               }
            }
         }
      }
   },
   "settings":{
      "number_of_shards":1,
      "number_of_replicas":1,
      "routing_partition_size":1,
      "analysis":{
         "analyzer":{
            "ngram_analyzer":{
               "type":"custom",
               "char_filter":[
                  "number_char_filter_map",
                  "remove_duplicates"
               ],
               "tokenizer":"ngram_tokenizer_whitespace",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            },
            "ngram_search_analyzer":{
               "type":"custom",
               "char_filter":[
                  "number_char_filter_map",
                  "remove_duplicates"
               ],
               "tokenizer":"ngram_tokenizer_whitespace",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            },
            "word_analyzer":{
               "type":"custom",
               "char_filter":[
                  "number_char_filter_map",
                  "remove_duplicates"
               ],
               "tokenizer":"word_tokenizer",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            },
            "word_search_analyzer":{
               "type":"custom",
               "char_filter":[
                  "number_char_filter_map",
                  "remove_duplicates"
               ],
               "tokenizer":"word_tokenizer",
               "filter":[
                  "lowercase",
                  "english_stop"
               ]
            }
         },
         "char_filter":{
            "number_char_filter_map":{
               "type":"mapping",
               "mappings":[
                  "\u0660 => 0",
                  "\u0661 => 1",
                  "\u0662 => 2",
                  "\u0663 => 3",
                  "\u0664 => 4",
                  "\u0665 => 5",
                  "\u0666 => 6",
                  "\u0667 => 7",
                  "\u0668 => 8",
                  "\u0669 => 9",
                  "\u06f0 => 0",
                  "\u06f1 => 1",
                  "\u06f2 => 2",
                  "\u06f3 => 3",
                  "\u06f4 => 4",
                  "\u06f5 => 5",
                  "\u06f6 => 6",
                  "\u06f7 => 7",
                  "\u06f8 => 8",
                  "\u06f9 => 9"
               ]
            },
            "remove_duplicates":{
               "type":"pattern_replace",
               "pattern":"(.)(?=\\1)",
               "replacement":""
            },
            "remove_white_spaces":{
               "type":"pattern_replace",
               "pattern":"(\\s)",
               "replacement":""
            }
         },
         "filter":{
            "english_stop":{
               "type":"stop",
               "ignore_case":true,
               "stopwords":"_english_"
            }
         },
         "tokenizer":{
            "ngram_tokenizer":{
               "type":"ngram",
               "min_gram":2,
               "max_gram":7
            },
            "ngram_tokenizer_whitespace":{
               "type":"ngram",
               "min_gram":2,
               "max_gram":7,
               "token_chars":[
                  "letter",
                  "digit",
                  "punctuation",
                  "symbol"
               ]
            },
            "word_tokenizer":{
               "type":"standard",
               "token_chars":[

               ]
            }
         }
      }
   }
}

Now i see search analyzers in mapping :]

I think it would be a good thing to have different analyzers and search analyzers for later customization also.

Saeed K
  • 13
  • 6