3

Inspired by Ayende's article https://ayende.com/blog/89089/ravendb-multi-maps-reduce-indexes, I have the following index, that works as such:

public class Posts_WithViewCountByUser : AbstractMultiMapIndexCreationTask<Posts_WithViewCountByUser.Result>
{
    public Posts_WithViewCountByUser()
    {
        AddMap<Post>(posts => from p in posts
            select new
            {
                ViewedByUserId = (string) null,
                ViewCount = 0,

                Id = p.Id,
                PostTitle = p.PostTitle,
            });

        AddMap<PostView>(postViews => from postView in postViews
            select new
            {
                ViewedByUserId = postView.ViewedByUserId,
                ViewCount = 1,

                Id = (string) postView.PostId,
                PostTitle = (string) null,
            });

        Reduce = results => from result in results
            group result by new
            {
                result.Id,
                result.ViewedByUserId
            }
            into g
            select new Result
            {
                ViewCount = g.Sum(x => x.ViewCount),
                Id = g.Key.Id,
                ViewedByUserId = g.Key.ViewedByUserId,
                PostTitle = g.Select(x => x.PostTitle).Where(x => x != null).FirstOrDefault(),
            };

        Store(x => x.PostTitle, FieldStorage.Yes);
    }

    public class Result
    {
        public string Id { get; set; }
        public string ViewedByUserId { get; set; }
        public int ViewCount { get; set; }
        public string PostTitle { get; set; }
    }
}

I want to query this index like this:

Return all posts including - for a given user - the integer of how many times, the user has viewed the post. The "views" are stored in a separate document type, PostView. Note, that my real document types have been renamed here to match the example from the article (I certainly would not implement "most-viewed" this way).

The result from the query I get is correct - i.e. I always get all the Post documents with the correct view-count for the user. But my problem is, the PostTitle field always is null in the result set (all Post documents have a non-null value in the dataset).

I'm grouping by the combination of userId and (post)Id as my "uniqueness". The way I understand it (and please correct me if I'm wrong), is, that at this point in the reduce, I have a bunch of pseudo-documents with identical userId /postId combination, some of which come from the Post map, others from the PostView map. Now I simply find any single pseudo-document of the ones, that actually have a value for PostTitle - i.e. one that originates from the Post map. These should all obviously have the same value, as it's the same post, just "outer-joined". The .Select(....).Where(....).FirstOrDefault() chain is taken from the very example I used as a base. I then set this ViewCount value for my final document, which I project into the Result.

My question is: how do I get the non-null value for the PostTitle field in the results?

Frederik Struck-Schøning
  • 12,981
  • 8
  • 59
  • 68

2 Answers2

3

The problem is that you have:

       ViewedByUserId = (string) null,

And:

        group result by new
        {
            result.Id,
            result.ViewedByUserId
        }
        into g

In other words, you are actually grouping by null, which I'm assuming that isn't your intent.

It would be much simpler to have a map/reduce index just on PostView and get the PostTitle from an include or via a transformer.

You understanding of what is going on is correct, in the sense that you are creating index results with userId / postId on them.

Buit what you are actually doing is creating results from PostView with userId /postId and from Post with null /postId.

And that is why you don't have the matches that you want.

Ayende Rahien
  • 22,925
  • 1
  • 36
  • 41
3

The grouping in the index is incorrect. With the following sample data:

new Post { Id = "Post-1", PostTitle = "Post Title", AuthorId = "Author-1" }
new PostView { ViewedByUserId = "User-1", PostId = "Post-1" }
new PostView { ViewedByUserId = "User-1", PostId = "Post-1" }
new PostView { ViewedByUserId = "User-2", PostId = "Post-1" }

The index results are like this:

ViewCount | Id     | ViewedByUserId | PostTitle
--------- | ------ | -------------- | ----------
 0        | Post-1 | null           | Post Title
 2        | Post-1 | User-1         | null
 1        | Post-1 | User-2         | null

The map operation in the index simply creates a common document for all source documents. Thus, the Post-1 document produces one row, the two documents for Post-1 and User-1 produce two rows (which are later reduced to the single row with ViewCount == 2) and the document for Post-1 and User-2 produces the last row.

The reduce operation the groups all the mapped rows and produces the resulting documents in the index. In this case, the Post-sourced document is stored separately from the PostView-sourced documents because the null value in the ViewedByUserId is not grouped with any document from the PostView collection.

If you can change your way of storing data, you can solve this issue by storing the number of views directly in the PostView. It would greatly reduce duplicate data in your database while having almost the same cost when updating the view count.

Complete test (needs xunit and RavenDB.Tests.Helpers nugets):

using Raven.Abstractions.Indexing;
using Raven.Client;
using Raven.Client.Indexes;
using Raven.Tests.Helpers;
using System.Linq;
using Xunit;

namespace SO41559770Answer
{
    public class SO41559770 : RavenTestBase
    {
        [Fact]
        public void SO41559770Test()
        {
            using (var server = GetNewServer())
            using (var store = NewRemoteDocumentStore(ravenDbServer: server))
            {
                new PostViewsIndex().Execute(store);

                using (IDocumentSession session = store.OpenSession())
                {
                    session.Store(new Post { Id = "Post-1", PostTitle = "Post Title", AuthorId = "Author-1" });
                    session.Store(new PostView { Id = "Views-1-1", ViewedByUserId = "User-1", PostId = "Post-1", ViewCount = 2 });
                    session.Store(new PostView { Id = "Views-1-2", ViewedByUserId = "User-2", PostId = "Post-1", ViewCount = 1 });
                    session.SaveChanges();
                }

                WaitForAllRequestsToComplete(server);
                WaitForIndexing(store);

                using (IDocumentSession session = store.OpenSession())
                {
                    var resultsForId1 = session
                        .Query<PostViewsIndex.Result, PostViewsIndex>()
                        .ProjectFromIndexFieldsInto<PostViewsIndex.Result>()
                        .Where(x => x.PostId == "Post-1" && x.UserId == "User-1");
                    Assert.Equal(2, resultsForId1.First().ViewCount);
                    Assert.Equal("Post Title", resultsForId1.First().PostTitle);
                    var resultsForId2 = session
                        .Query<PostViewsIndex.Result, PostViewsIndex>()
                        .ProjectFromIndexFieldsInto<PostViewsIndex.Result>()
                        .Where(x => x.PostId == "Post-1" && x.UserId == "User-2");
                    Assert.Equal(1, resultsForId2.First().ViewCount);
                    Assert.Equal("Post Title", resultsForId2.First().PostTitle);
                }
            }
        }
    }

    public class PostViewsIndex : AbstractIndexCreationTask<PostView, PostViewsIndex.Result>
    {
        public PostViewsIndex()
        {
            Map = postViews => from postView in postViews
                               let post = LoadDocument<Post>(postView.PostId)
                               select new
                               {
                                   Id = postView.Id,
                                   PostId = post.Id,
                                   PostTitle = post.PostTitle,
                                   UserId = postView.ViewedByUserId,
                                   ViewCount = postView.ViewCount,
                               };
            StoreAllFields(FieldStorage.Yes);
        }


        public class Result
        {
            public string Id { get; set; }
            public string PostId { get; set; }
            public string PostTitle { get; set; }
            public string UserId { get; set; }
            public int ViewCount { get; set; }
        }
    }

    public class Post
    {
        public string Id { get; set; }
        public string PostTitle { get; set; }
        public string AuthorId { get; set; }
    }

    public class PostView
    {
        public string Id { get; set; }
        public string ViewedByUserId { get; set; }
        public string PostId { get; set; }
        public int ViewCount { get; set; }
    }
}