I've got problem with sorting my lucene.net index in .NET. I tried almost every solution on stackoverflow and looking for google answers. I'm using Lucene.NET 2.9.2 and ASP.NET 2.0. I want to sort over string like in sql you can type 'order by Title desc [asc]'
I will show you my code and I hope someone can help me.
//Here I create Index with some fields
doc.Add(new Field("prod_id",row["prod_id"].ToString(),Field.Store.YES,Field.Index.ANALYZED));
doc.Add(new Field("prod_title", row["prod_title"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_desc", row["prod_desc"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_author", row["prod_author"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_publisher", row["prod_publisher"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("prod_price", row["prod_price"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
//Then next I try to do search with sort option:
//method for return approciate Sort object
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,false);
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField("prod_price",SortField.DOUBLE,true);
break;
case "Title":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, true));
break;
case "TitleDESC":
//not working
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
case "":
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
default:
sort = new Sort(new SortField("prod_title", SortField.STRING, false));
break;
}
return sort;
}
//Inside my query of lucene method:
StandardAnalyzer analizer = new StandardAnalyzer(Version.LUCENE_29);
IndexReader reader =IndexReader.Open(IndexPath);
Searcher searcher = new IndexSearcher(reader);
//Here call for Sort object
Sort sort = SetSortForLucene(_sort);
TopFieldDocCollector collector = new TopFieldDocCollector(reader, sort, pageSize);
//Find which document field need to me asked in QueryParser object
string _luceneField = "";
if (luceneField.Contains("_"))
_luceneField = luceneField;
else
switch (luceneField)
{
case "Title": _luceneField = "prod_title"; break;
case "Description": _luceneField = "prod_desc"; break;
case "Author": _luceneField = "prod_author"; break;
case "Publisher": _luceneField = "prod_publisher"; break;
default: _luceneField = "prod_title"; break;
}
QueryParser parser = new QueryParser(_luceneField, analizer);
Query query = parser.Parse(luceneQuery);
ScoreDoc[] hits;
searcher.Search(query,collector);
//Obtaining top records from search but without any sort.
hits = collector.TopDocs().scoreDocs;
foreach (ScoreDoc hit in hits)
{
Document doc = searcher.Doc(hit.doc);
string a = doc.Get("prod_id");
int id = 0;
if (hit.score > score)
{
if (int.TryParse(doc.Get("prod_id"), out id))
tmpId.Add(id);
}
}
//I also define stop words for full text searching and i think this is
//real cause of problem with sorting.
System.String[] stopWords = new System.String[]{"a","że","w","przy","o","bo","co","z","za","ze","ta","i","no","do"};
I used this link in stackoverflow. and this pretty one link to solve my problem but sorting fails and I don't know what is wrong with my code.
After few days finally i found a solution. The field which I want sort shoudn't be tokenized when it represent string value.
For example when I want to sort products by Title (ASCENDING/DESCENDING) you should put something like this:
doc.Add(new Field(Product.PROD_TITLE_SORT, row["prod_title"].ToString().Replace(" ", "_") + "_" + row[Product.PROD_ID].ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
What I don't get is fact why this field is not storing and not analizyng and thus lucene.net can sort by this added field. This sort field even is not in index!! I checked with lukeall-1.0.1.jar index browser.
Secondly you need to create a proper sort method:
private static Sort SetSortForLucene(string _sort)
{
Sort sort;
_sort = !string.IsNullOrEmpty(_sort) ? _sort : "";
switch (_sort)
{
case "UnitPriceGorss":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, false));
break;
case "UnitPriceGorssDESC":
sort = new Sort(new SortField(PROD_PRICE, SortField.DOUBLE, true));
break;
case "Title":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, true));
break;
case "TitleDESC":
//not it works perfectly.
sort = new Sort(new SortField(PROD_TITLE_SORT, SortField.STRING, false));
break;
case ""://Here is default sorting behavior. It get's result according to Lucene.NET search result score.
sort = new Sort(SortField.FIELD_SCORE);
break;
default:
sort = new Sort(SortField.FIELD_SCORE);
break;
}
return sort;
}
What makes me really suspicious is that sort works with SortField.DOUBLE when field is indexed in lucene fulltext index.
I Hope this post will help anyone who has similar problem with sorting.