4

I added a synonyms analyzer and filter to my elastic index so that when searching by state, "Massachusetts," "Ma," and "Mass," would return the same results for example. These are the settings that I have:

analysis":{
    "analyzer":{
        "synonyms":{
            "filter":[
                "lowercase",
                "synonym_filter"
            ],
            "tokenizer": "standard"
        },
        "my_analyzer":{
            "filter":["standard", "lowercase", "my_soundex" ], 
            "tokenizer": "standard"}
        },
    "filter":{
        "my_soundex":{
            "replace": "false", 
            "type": "phonetic", 
            "encoder": "soundex"
        },
        "synonym_filter":{
            "type": "synonym",
            "synonyms":[
                "United States,US,USA,USA=>usa",
                "Alabama,Al,Ala,Ala",
                "Alaska,Ak,Alas,Alas",
                "Arizona,Az,Ariz",
                "Arkansas,Ar,Ark",
                "California,Ca,Calif,Cal",
                "Colorado,Co,Colo,Col",
                "Connecticut,Ct,Conn",
                "Deleware,De,Del",
                "District of Columbia,Dc,Wash Dc,Washington Dc=>Dc",
                "Florida,Fl,Fla,Flor",
                "Georgia,Ga",
                "Hawaii,Hi",
                "Idaho,Id,Ida",
                "Illinois,Il,Ill,Ills",
                "Indiana,In,Ind",
                "Iowa,Ia,Ioa",
                "Kansas,Kans,Kan,Ks",
                "Kentucky,Ky,Ken,Kent",
                "Louisiana,La",
                "Maine,Me",
                "Maryland,Md",
                "Massachusetts,Ma,Mass",
                "Michigan,Mi,Mich",
                "Minnesota,Mn,Minn",
                "Mississippi,Ms,Miss",
                "Missouri,Mo",
                "Montana,Mt,Mont",
                "Nebraska,Ne,Neb,Nebr",
                "Nevada,Nv,Nev"
                "New Hampshire,Nh=>Nh",
                "New Jersey,Nj=>Nj",
                "New Mexico,Nm,N Mex,New M=>Nm",
                "New York,Ny=>Ny",
                "North Carolina,Nc,N Car=>Nc",
                "North Dakota,Nd,N Dak, NoDak=>Nd",
                "Ohio,Oh,O",
                "Oklahoma,Ok,Okla",
                "Oregon,Or,Oreg,Ore",
                "Pennsylvania,Pa,Penn,Penna",
                "Rhode Island,Ri,Ri & PP,R Isl=>Ri",
                "South Carolina,Sc,S Car=>Sc",
                "South Dakota,Sd,S Dak,SoDak=>Sd",
                "Tennessee,Te,Tenn",
                "Texas,Tx,Tex",
                "Utah,Ut",
                "Vermont,Vt",
                "Virginia,Va,Virg",
                "Washington,Wa,Wash,Wn",
                "West Virginia,Wv,W Va, W Virg=>Wv",
                "Wisconsin,Wi,Wis,Wisc",
                "Wyomin,Wi,Wyo"
            ]
        }
    }
}

However, the synonyms filter doesn't seem to be working. Here are two queries that I tried:

"match": {
    "location.location_raw": {
        "type": "boolean",
        "operator": "AND",
        "query": "Massachusetts",
        "analyzer": "synonyms"
     }
}

"match": {
    "location.location_raw": {
        "type": "boolean",
        "operator": "AND",
        "query": "Mass",
        "analyzer": "synonyms"
     }
}

With the synonyms filter I should get the same number of results for both queries, but I get 6 results for "Massachusetts" and 2 results for "Mass," and when I look at the results, all of the location_raw fields for the first query contain "Massachusetts" while all of the location_raw fields for the second query contain "Mass" exactly. It seems like the synonyms anazlyer is just being ignored.

What am I missing here?

Erica Stockwell-Alpert
  • 4,624
  • 10
  • 63
  • 130

0 Answers0