I am facing a problem while setting up the Lucene search engine in umbraco. I am trying to make a search on the data stored in default index created by Umbraco. The method for searching is below:
private DictionaryResult GetRowContent(
Lucene.Net.Highlight.Highlighter highlighter,
Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer
,Lucene.Net.Documents.Document doc1, string criteria)
{
JavaScriptSerializer jsScriptSerializer = new JavaScriptSerializer();
DictionaryResult controls = new DictionaryResult();
Lucene.Net.Analysis.TokenStream stream = analyzer.TokenStream("", new StringReader(doc1.Get("bodyContent")));
dynamic rowContentHtmlDocument = JObject.Parse(((JValue)doc1.Get("bodyContent")).ToString(CultureInfo.CurrentCulture));
foreach (dynamic section in rowContentHtmlDocument.sections)
{
foreach (var row in section.rows)
{
foreach (var area in row.areas)
{
foreach (var control in area.controls)
{
if (control != null && control.editor != null) // && control.editor.view != null)
{
JObject rowContentHtml = null;
try
{
rowContentHtml = JObject.Parse(((JContainer)control)["value"].ToString());
}
catch (Exception e)
{
}
if (rowContentHtml != null)
{
try
{
var macroParamsDictionary = JObject.Parse(((JContainer)rowContentHtml)["macroParamsDictionary"].ToString());
var documentText = macroParamsDictionary.GetValue("dokument");
if (documentText != null)
{
var document = documentText.ToString().Replace(""", "\"");
dynamic documents = jsScriptSerializer.Deserialize<dynamic>(document);
foreach (Dictionary<string, object> doc in documents)
{
if (doc.ContainsKey("FileName") && doc.ContainsKey("DocumentId"))
{
if (doc["FileName"].ToString().Length > 0 &&
doc["FileName"].ToString().ToLower().Contains(criteria.ToLower()))
{
controls.Add(new RowResult()
{
Type = 0,
Object = new Document()
{
DocumentName = doc["FileName"].ToString(),//highlighter.GetBestFragments(stream, doc["FileName"].ToString(), 1, "..."),
DocId = Guid.Parse(doc["DocumentId"].ToString())
} // StringBuilder(@"<a href=" + Url.Action("DownloadDocument", "Document", new { DocumentId = doc["DocumentId"] }) + "> " + @doc["FileName"] + "</a>").ToString()
}
);
}
}
}
}
}
catch (Exception e)
{
}
}
else
{
var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
var textResultFiltered = highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
controls.Add(new RowResult()
{
Type = 1,
Object = textResultFiltered
});
}
}
}
}
}
}
return controls;
}
Here I am trying to filter macro documents from simple html content and render differently. But at the end to this part
var text = HtmlRemoval.StripTagsRegex(((JContainer)control)["value"].ToString()).Replace("ë", "e").Replace("ç", "c");
var textResultFiltered = highlighter.GetBestFragments(stream,doc1.Get("bodyContent"), 5, "...");
controls.Add(new RowResult()
{
Type = 1,
Object = textResultFiltered
});
it is including the macro in searching. As the result I get the documents property but the html content hightlighted has the macro content like below:
6th Edition V413HAV.pdf","FileContent"... Framework 6th Edition V413HAV.pdf","... with Java 8 - 1st Edition (2015) - Copy.pdf"... 4.5 Framework 6th Edition V413HAV.pdf","... And The NET 4.5 Framework 6th Edition V413HAV.pdf" which is coming from Json data of the macro. Any idea how to exclude the macros from searching or to customize the hmtl content not to search on specific macro ? Thanks in advance.
I am refering to this link to create the Hightlighter etc... Link to Lucene example
Any idea how to prevent searching on macros or exclude them from the highlighted content ?