I'm trying to build a fuzzy search query in pylucene, but even using the same term that was already uploaded it doesn't return anything. I've tried upload the fields as StringField or TextField and even using a customized one and change the maxEdits option. And with small words it works for example setting in the bellow code fuzzy_query = 'fox'
, but neither fuzzy_query = 'brown fox'
or fuzzy_query = 'The brown fox'
return anything.
import lucene
from org.apache.lucene.store import NIOFSDirectory
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.index import IndexWriter, IndexWriterConfig
from org.apache.lucene.search import IndexSearcher, FuzzyQuery
from java.nio.file import Paths
from org.apache.lucene.index import IndexOptions
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.index import Term
from org.apache.lucene.search import IndexSearcher, TermQuery
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
my_path = "../index"
# create index writer
analyzer = StandardAnalyzer()
config = IndexWriterConfig(analyzer)
index_dir = NIOFSDirectory(Paths.get(my_path))
writer = IndexWriter(index_dir, config)
# define fuzzy field
field_type = FieldType()
field_type.setStored(True)
field_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
field_type.setTokenized(True)
field_type.setStoreTermVectors(True)
field_type.setStoreTermVectorPositions(True)
field_type.setStoreTermVectorOffsets(True)
field_type.setStoreTermVectorPayloads(True)
# add documents to index with fuzzy field
doc = Document()
doc.add(Field("title_fuzzy", "The brown fox", field_type))
writer.addDocument(doc)
doc = Document()
doc.add(Field("title_fuzzy", "jumps over the lazy dog", field_type))
writer.addDocument(doc)
# commit changes
writer.commit()
writer.close()
directory = NIOFSDirectory(Paths.get(my_path))
# create an IndexReader and IndexSearcher
reader = DirectoryReader.open(directory)
searcher = IndexSearcher(reader)
# search for documents with fuzzy field
fuzzy_term = "The brown fox"
fuzzy_query = FuzzyQuery(Term("title_fuzzy", fuzzy_term), maxEdits=2)
hits = searcher.search(fuzzy_query, 1).scoreDocs
for hit in hits:
doc = searcher.doc(hit.doc)
print("Document: ", doc)
Thanks in advance!