Enviornment- java version "11.0.12" 2021-07-20 LTS, solr-8.9.0
I have the following field declaration for my Solr index:
<field name="Field1" type="string" multiValued="false" indexed="false" stored="true"/>
<field name="author" type="text_general" multiValued="false" indexed="true" stored="true"/>
<field name="Field2" type="string" multiValued="false" indexed="false" stored="true"/>
Field type:
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
Solr-core has been created using command : ./solr create -c fuzzyCore The .csv file used to indexed the data is https://drive.google.com/file/d/1z684x2GKsSQWGAdyi6O4uKit4a96iiuh/view
I understand that "Lucene supports fuzzy searches based on the Levenshtein Distance, or Edit Distance algorithm. To do a fuzzy search the tilde, "~", symbol at the end of a Single word Term is used.
~ operator is used to run fuzzy searches. We need to add ~ operator after every single term and can also specify distance which is optional after that as below."
{FIELD_NAME:TERM_1~{Edit_Distance}
Since 'KeywordTokenizer' keeps the whole input as a single token and I want each word to be searchable, so 'StandardTokenizer' is used.
request looks like as mentioned below :
curl "http://localhost:8983/solr/fuzzyCore/select" --data-urlencode "q=author:beaeb~' AND Field1:(w1 x)" --data-urlencode "rows=20"
{
"responseHeader":{
"status":0,
"QTime":14,
"params":{
"q":"author:beaeb~' AND Field1:(w1 x)",
"rows":"20"}},
"response":{"numFound":12,"start":0,"numFoundExact":true,"docs":[
{
"Field1":"x",
"author":"bbaeb",
"Field2":"o",
"id":"f8fbb58d-9e0d-47b2-aa3c-e3920e25a7d1",
"_version_":1746912583192936455},
{
"Field1":"x",
"author":"beabe",
"Field2":"p",
"id":"7d73e7ba-8455-4eb4-818f-1e19b1d35a22",
"_version_":1746912583244316680},
{
"Field1":"x",
"author":"baeeb",
"Field2":"n",
"id":"b4e86fc3-7ecc-407b-b638-88d167a66934",
"_version_":1746912583292551181},
{
"Field1":"x",
"author":"beaea",
"Field2":"o",
"id":"131ad4de-eaa2-47b8-b58b-e690316eed1c",
"_version_":1746912583314571267},
{
"Field1":"x",
"author":"bbaeb",
"Field2":"q",
"id":"d034e66c-a302-4b24-a186-5a2bafecab40",
"_version_":1746912583392165900},
{
"Field1":"x",
"author":"beacb",
"Field2":"n",
"id":"c0ab3e48-2b2d-438d-8cc2-1acfcf6efde8",
"_version_":1746912583490732036},
{
"Field1":"x",
"author":"aeabe",
"Field2":"m",
"id":"4472ec5d-eace-446f-b1d6-c8911be24368",
"_version_":1746912583266336776},
{
"Field1":"x",
"author":"baeab",
"Field2":"q",
"id":"b4c24da3-9199-4eba-a8a3-e30fc17d9167",
"_version_":1746912583274725377},
{
"Field1":"x",
"author":"aeaea",
"Field2":"n",
"id":"bb17bc26-e392-4fed-ae46-bbdd40af0ac0",
"_version_":1746912583294648329},
{
"Field1":"x",
"author":"aeceb",
"Field2":"p",
"id":"5e5cfe21-ff19-464f-8adf-8b5888c418e4",
"_version_":1746912583296745472},
{
"Field1":"x",
"author":"baeab",
"Field2":"p",
"id":"54a3c8e6-137d-47c3-9192-a5ed1904dc55",
"_version_":1746912583357562889},
{
"Field1":"x",
"author":"aeeeb",
"Field2":"m",
"id":"200694a0-6248-49fd-8182-dac79657e045",
"_version_":1746912583385874444}]
}}
, The above request is not retrieving output as 'author:bebbeb',although there is author:'bebbeb' is present in data with Field1:w1. This can be verified with following two commands
curl "http://localhost:8983/solr/fuzzyCore/select" --data-urlencode "q=author:beaeb~' AND Field1:w1"
{
"responseHeader":{
"status":0,
"QTime":4,
"params":{
"q":"author:beaeb~' AND Field1:w1"}},
"response":{"numFound":0,"start":0,"numFoundExact":true,"docs":[]
}}
Although output of following command is
curl "http://localhost:8983/solr/fuzzyCore/select" --data-urlencode "q=Field1:w1"
{
"responseHeader":{
"status":0,
"QTime":1,
"params":{
"q":"Field1:w1"}},
"response":{"numFound":1,"start":0,"numFoundExact":true,"docs":[
{
"Field1":"w1",
"author":"bebbeb",
"Field2":"p",
"id":"4356dff2-ab93-4bab-a4dc-1797db38240c",
"_version_":1746912583504363523}]
}}
so I tried to post everything you need to understand my problem. Any ideas? Why author:'bebbeb' is not resulting as output for input:beaeb~