I can't figure out how to make phrase query to work. It returns exact mathes, but slop option doesn't seem to make a difference.
Here's my code:
static void Main(string[] args)
{
using (Directory directory = new RAMDirectory())
{
Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
using (IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
{
// index a few documents
writer.AddDocument(createDocument("1", "henry morgan"));
writer.AddDocument(createDocument("2", "henry junior morgan"));
writer.AddDocument(createDocument("3", "henry immortal jr morgan"));
writer.AddDocument(createDocument("4", "morgan henry"));
}
// search for documents that have "foo bar" in them
String sentence = "henry morgan";
IndexSearcher searcher = new IndexSearcher(directory, true);
PhraseQuery query = new PhraseQuery()
{
//allow inverse order
Slop = 3
};
query.Add(new Term("contents", sentence));
// display search results
List<string> results = new List<string>();
Console.WriteLine("Looking for \"{0}\"...", sentence);
TopDocs topDocs = searcher.Search(query, 100);
foreach (ScoreDoc scoreDoc in topDocs.ScoreDocs)
{
var matchedContents = searcher.Doc(scoreDoc.Doc).Get("contents");
results.Add(matchedContents);
Console.WriteLine("Found: {0}", matchedContents);
}
}
private static Document createDocument(string id, string content)
{
Document doc = new Document();
doc.Add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("contents", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
return doc;
}
I thought that all options except document with id=3 are supposed to match, but only the first one does. Did I miss something?