1

I am using whoosh package to fuzzy search in Python. I am wondering if there is any way to return the distance?

My code is as follows:

import codecs
import whoosh
import os, os.path



from whoosh.index import create_in
from whoosh.fields import *
from whoosh.query import FuzzyTerm


class MyFuzzyTerm(FuzzyTerm):
    def __init__(self, fieldname, text, boost=1.0, maxdist=5, prefixlength=1, constantscore=True):
        super(MyFuzzyTerm, self).__init__(fieldname, text, boost, maxdist, prefixlength, constantscore)


if not os.path.exists("indexdir"):
    os.mkdir("indexdir")

path = u"MMM2.txt"
content = open('MMM2.txt', 'r').read()

schema = Schema(name=TEXT(stored=True), content=TEXT)
ix = create_in("indexdir", schema)
writer = ix.writer()
writer.add_document(name=path, content= content)

writer.commit()

from whoosh.qparser import QueryParser, FuzzyTermPlugin, PhrasePlugin, SequencePlugin

with ix.searcher() as searcher:
    parser = QueryParser(u"content", ix.schema,termclass = MyFuzzyTerm)
    parser.add_plugin(FuzzyTermPlugin())
    parser.remove_plugin_class(PhrasePlugin)
    parser.add_plugin(SequencePlugin())
    query = parser.parse(u"\"Tennessee Riverkeep Inc\"~")
    results = searcher.search(query)
    print ("nb of results =", len(results))
    for r in results:
        print (r)

The item I search is "Tennessee Riverkeep Inc". The item showed exactly in the document is "Tennessee Riverkeeper Inc". So the distance is 2 in this case. I set the maximum distance 5. Is there any way to return the number "2" in this case?

dara wong
  • 37
  • 5

0 Answers0