2

I am trying to do a relational search with Lucene.NET 4.8 (actually I compiled it using the latest sources) by following this post. I reference Lucene.Net, Lucene.Net.Analysis.Common, Lucene.Net.Grouping, Lucene.Net.Join, and Lucene.Net.QueryParser.

The problem is: I do not get any results. In my example below I consider blog the parent while comments are the children. I want to find a blog which contains first and which has a comment containing like (which is the one with Id 1).

How to fix this sample code?

    static void BlockJoinQueryTest(string dbFolder)
    {
        var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
        var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
        config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND);

        var indexPathBlog = dbFolder + "\\blog_db";
        if (System.IO.Directory.Exists(indexPathBlog))
        {
            System.IO.Directory.Delete(indexPathBlog, true);
        }
        System.IO.Directory.CreateDirectory(indexPathBlog);
        var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
        var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);

        Document comment = new Document();
        comment.Add(new TextField("BlogId", "1", Field.Store.YES));
        comment.Add(new TextField("CommentContent", "I like your first blog!", Field.Store.YES));
        comment.Add(new TextField("Type", "comment", Field.Store.YES));
        comment.Add(new TextField("Note", "child", Field.Store.YES));
        indexWriterBlog.AddDocument(comment);

        comment = new Document();
        comment.Add(new TextField("BlogId", "1", Field.Store.YES));
        comment.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
        comment.Add(new TextField("Type", "comment", Field.Store.YES));
        comment.Add(new TextField("Note", "child", Field.Store.YES));
        indexWriterBlog.AddDocument(comment);

        Document blog = new Document();
        blog.Add(new TextField("Id", "1", Field.Store.YES));
        blog.Add(new TextField("BlogContent", "Content of first blog", Field.Store.YES));
        blog.Add(new TextField("Type", "blog", Field.Store.YES));
        blog.Add(new TextField("Note", "parent", Field.Store.YES));
        indexWriterBlog.AddDocument(blog);

        blog = new Document();
        blog.Add(new TextField("Id", "2", Field.Store.YES));
        blog.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
        blog.Add(new TextField("Type", "blog", Field.Store.YES));
        blog.Add(new TextField("Note", "parent", Field.Store.YES));
        indexWriterBlog.AddDocument(blog);

        indexWriterBlog.Commit();

        var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));

        Console.WriteLine("Begin content enumeration:");
        for (int i = 0; i < searcher.IndexReader.MaxDoc; i++)
        {
            var doc = searcher.IndexReader.Document(i);
            Console.WriteLine("Document " + i + ": " + doc.ToString());
        }
        Console.WriteLine("End content enumeration.");

        Filter blogs = new CachingWrapperFilter(
                new QueryWrapperFilter(
                  new TermQuery(
                    new Term("Type", "blog"))));
        BooleanQuery commentQuery = new BooleanQuery();
        commentQuery.Add(new TermQuery(new Term("CommentContent", "like")), BooleanClause.Occur.MUST);
        //commentQuery.Add(new TermQuery(new Term("BlogId", "1")), BooleanClause.Occur.MUST);

        var commentJoinQuery = new ToParentBlockJoinQuery(
            commentQuery,
            blogs,
            ScoreMode.None);

        BooleanQuery query = new BooleanQuery();
        query.Add(new TermQuery(new Term("BlogContent", "first")), BooleanClause.Occur.MUST);
        query.Add(commentQuery, BooleanClause.Occur.MUST);
        var c = new ToParentBlockJoinCollector(
            Sort.RELEVANCE, // sort
            10,             // numHits
            true,           // trackScores
            false           // trackMaxScore
            );
        searcher.Search(query, c);
        int maxDocsPerGroup = 10;
        var hits = c.GetTopGroups(
            commentJoinQuery,
            Sort.INDEXORDER,
            0,   // offset
            maxDocsPerGroup,  // maxDocsPerGroup
            0,   // withinGroupOffset
            true // fillSortFields
          );
        if (hits != null)
        {
            Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
            for (int i = 0; i < hits.TotalGroupCount; i++)
            {
                var group = hits.Groups[i];
                Console.WriteLine("Group " + i + ": " + group.ToString());

                for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
                {
                    Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
                    Console.WriteLine("Hit " + i + ": " + doc.ToString());
                }
            }
        }
        else
        {
            Console.WriteLine("No hits.");
        }

        Console.WriteLine("Done.");
Jack Miller
  • 6,843
  • 3
  • 48
  • 66

2 Answers2

0

Don't you have to add the Documents in one go as an IEnumerable so they are 'blocked'?

http://blog.mikemccandless.com/2012/01/searching-relational-content-with.html explains more

EDIT:

I had a go at doing this with the following code which also doesn't seem to work, would appreciate if anyone could shed any light?

        var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
        var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
        config.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE_OR_APPEND);

        var indexPathBlog = "D:\\Test";
        if (System.IO.Directory.Exists(indexPathBlog))
        {
            System.IO.Directory.Delete(indexPathBlog, true);
        }

        System.IO.Directory.CreateDirectory(indexPathBlog);
        var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
        var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);

        var one = new List<Document>();
        var two = new List<Document>();

        var blogOne = new Document();
        blogOne.Add(new TextField("Id", "1", Field.Store.YES));
        blogOne.Add(new TextField("BlogContent", "Content of first blog", Field.Store.YES));
        blogOne.Add(new TextField("Type", "blog", Field.Store.YES));
        blogOne.Add(new TextField("Note", "parent", Field.Store.YES));
        one.Add(blogOne);

        Document commentOne = new Document();
        commentOne.Add(new TextField("BlogId", "1", Field.Store.YES));
        commentOne.Add(new TextField("CommentContent", "I like your first blog!", Field.Store.YES));
        commentOne.Add(new TextField("Type", "comment", Field.Store.YES));
        commentOne.Add(new TextField("Note", "child", Field.Store.YES));
        one.Add(commentOne);

        Document blogTwo = new Document();
        blogTwo.Add(new TextField("Id", "2", Field.Store.YES));
        blogTwo.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
        blogTwo.Add(new TextField("Type", "blog", Field.Store.YES));
        blogTwo.Add(new TextField("Note", "parent", Field.Store.YES));
        two.Add(blogTwo);

        var commentTwo = new Document();
        commentTwo.Add(new TextField("BlogId", "2", Field.Store.YES));
        commentTwo.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
        commentTwo.Add(new TextField("Type", "comment", Field.Store.YES));
        commentTwo.Add(new TextField("Note", "child", Field.Store.YES));
        two.Add(commentTwo);

        indexWriterBlog.AddDocuments(one);
        indexWriterBlog.AddDocuments(two);

        indexWriterBlog.Commit();

        var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));

        Filter parentQuery = 
                new QueryWrapperFilter(
                  new TermQuery(
                    new Term("type", "blog")));

        BooleanQuery childQuery = new BooleanQuery();
        childQuery.Add(new TermQuery(new Term("CommentContent", "I like your first blog!")), BooleanClause.Occur.MUST);

        var commentJoinQuery = new ToParentBlockJoinQuery(
            childQuery,
            parentQuery,
            ScoreMode.None);

        BooleanQuery query = new BooleanQuery();
        //query.Add(new TermQuery(new Term("Type", "blog")), BooleanClause.Occur.MUST);
        query.Add(commentJoinQuery, BooleanClause.Occur.MUST);

        var c = new ToParentBlockJoinCollector(
            Sort.RELEVANCE, // sort
            10,             // numHits
            false,           // trackScores
            false           // trackMaxScore
            );

        searcher.Search(commentJoinQuery, c);

        int maxDocsPerGroup = 10;
        var hits = c.GetTopGroups(
            commentJoinQuery,
            Sort.INDEXORDER,
            0,   // offset
            maxDocsPerGroup,  // maxDocsPerGroup
            0,   // withinGroupOffset
            true // fillSortFields
          );

        if (hits != null)
        {
            Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
            for (int i = 0; i < hits.TotalGroupCount; i++)
            {
                var group = hits.Groups[i];
                Console.WriteLine("Group " + i + ": " + group.ToString());

                for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
                {
                    Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
                    Console.WriteLine("Hit " + i + ": " + doc.ToString());
                }
            }
        }
        else
        {
            Console.WriteLine("No hits.");
        }

        Console.WriteLine("Done.");
Ant
  • 336
  • 1
  • 3
  • 14
  • Take a look in the comments of the lucene.net code. The order of the blocks are wrong, add the blog last, the comments first. /// This query requires that you index /// children and parent docs as a single block, using the /// /// or /// API. In each block, the /// child documents must appear first, ending with the parent /// document. – André Mar 04 '18 at 10:00
0

I also stumbled across this and managed to fix it.

  • @Ant is right when stating that the parent documents MUST be the last in the block.

But there were two remaining problems with the code

  1. For some reason - I am sorry for not being a Lucene expert - when the CommentCOntent is a sentence ("I like your first blog!") and you search for it using a Term query, you won't get any results. I guess this has something to do with the analyzing of the field. So what I did was to replace the fields content with "blog"

  2. Now the IndexSercher seemed to find a result, but threw an error as "System.InvalidOperationException: 'parentFilter must return FixedBitSet; got Lucene.Net.Search.QueryWrapperFilter+DocIdSetAnonymousInnerClassHelper" Looking through the test cases of lucene.net (Github), I saw that I had to wrap the parentQuery in a FixedBitSetCachingWrapperFilter: Filter parentQuery = new FixedBitSetCachingWrapperFilter( new QueryWrapperFilter( new TermQuery( new Term("Type", "blog"))));

The full code is:

  var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
            var config = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
            config.SetOpenMode(OpenMode.CREATE_OR_APPEND);

            var indexPathBlog = Path.Combine(Environment.CurrentDirectory, "index");
            if (System.IO.Directory.Exists(indexPathBlog))
            {
                System.IO.Directory.Delete(indexPathBlog, true);
            }

            System.IO.Directory.CreateDirectory(indexPathBlog);
            var indexDirectoryBlog = FSDirectory.Open(new System.IO.DirectoryInfo(indexPathBlog));
            var indexWriterBlog = new IndexWriter(indexDirectoryBlog, config);

            var one = new List<Document>();
            var two = new List<Document>();


            Document commentOne = new Document();
            commentOne.Add(new TextField("BlogId", "1", Field.Store.YES));
            commentOne.Add(new TextField("CommentContent", "blog", Field.Store.YES));
            commentOne.Add(new TextField("Type", "comment", Field.Store.YES));
            commentOne.Add(new TextField("Note", "child", Field.Store.YES));
            one.Add(commentOne);

            var blogOne = new Document();
            blogOne.Add(new TextField("Id", "1", Field.Store.YES));
            blogOne.Add(new TextField("BlogContent", "Content of first blog!", Field.Store.YES));
            blogOne.Add(new TextField("Type", "blog", Field.Store.NO));
            blogOne.Add(new TextField("Note", "parent", Field.Store.YES));
            one.Add(blogOne);

            var commentTwo = new Document();
            commentTwo.Add(new TextField("BlogId", "2", Field.Store.YES));
            commentTwo.Add(new TextField("CommentContent", "Not that great.", Field.Store.YES));
            commentTwo.Add(new TextField("Type", "comment", Field.Store.YES));
            commentTwo.Add(new TextField("Note", "child", Field.Store.YES));
            two.Add(commentTwo);

            Document blogTwo = new Document();
            blogTwo.Add(new TextField("Id", "2", Field.Store.YES));
            blogTwo.Add(new TextField("BlogContent", "This is the second blog!", Field.Store.YES));
            blogTwo.Add(new TextField("Type", "blog", Field.Store.NO));
            blogTwo.Add(new TextField("Note", "parent", Field.Store.YES));
            two.Add(blogTwo);

            indexWriterBlog.AddDocuments(one);
            indexWriterBlog.AddDocuments(two);

            indexWriterBlog.Commit();

            var searcher = new IndexSearcher(DirectoryReader.Open(indexDirectoryBlog));

            Filter parentQuery =
                new FixedBitSetCachingWrapperFilter(
                    new QueryWrapperFilter(
                        new TermQuery(
                            new Term("Type", "blog"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new TermQuery(new Term("CommentContent", "blog")), Occur.MUST);

            var commentJoinQuery = new ToParentBlockJoinQuery(
                childQuery,
                parentQuery,
                ScoreMode.None);

            BooleanQuery query = new BooleanQuery();
            //query.Add(new TermQuery(new Term("Type", "blog")), BooleanClause.Occur.MUST);
            query.Add(commentJoinQuery, Occur.MUST);

            var c = new ToParentBlockJoinCollector(
                Sort.RELEVANCE, // sort
                10,             // numHits
                false,           // trackScores
                false           // trackMaxScore
            );

            searcher.Search(commentJoinQuery, c);

            int maxDocsPerGroup = 10;
            var hits = c.GetTopGroups(
                commentJoinQuery,
                Sort.INDEXORDER,
                0,   // offset
                maxDocsPerGroup,  // maxDocsPerGroup
                0,   // withinGroupOffset
                true // fillSortFields
            );

            if (hits != null)
            {
                Console.WriteLine("Found " + hits.TotalGroupCount + " groups:");
                for (int i = 0; i < hits.TotalGroupCount; i++)
                {
                    var group = hits.Groups[i];
                    Console.WriteLine("Group " + i + ": " + group.ToString());

                    for (int j = 0; j < group.TotalHits && j < maxDocsPerGroup; j++)
                    {
                        Document doc = searcher.Doc(group.ScoreDocs[j].Doc);
                        Console.WriteLine("Hit " + i + ": " + doc.ToString());
                    }
                }
            }
            else
            {
                Console.WriteLine("No hits.");
            }

            Console.WriteLine("Done.");
            Console.ReadKey();

Note that I used the following Pacakges in a .NET Core Console app:

<PackageReference Include="Lucene.Net" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
<PackageReference Include="Lucene.Net.Join" Version="4.8.0-beta00005" />
Alexander Marek
  • 479
  • 1
  • 7
  • 22