1

I’m wondering is there any way to implement Distinct in Reactive Extensions for .NET in such way that it will be working for given time and after this time it should reset and allow values that are come back again. I need this for hot source in application that will be working for whole year with now stops so I’m worried about performance and I need those values to be allowed after some time. There is also DistinctUntilChanged but in my case values could be mixed – for example: A A X A, DistinctUntilChanged will give me A X A and I need result A X and after given time distinct should be reset.

dnf
  • 1,659
  • 2
  • 16
  • 29

2 Answers2

5

The accepted answer is flawed; flaw demonstrated below. Here's a demonstration of solution, with a test batch:

TestScheduler ts = new TestScheduler();

var source = ts.CreateHotObservable<char>(
    new Recorded<Notification<char>>(200.MsTicks(), Notification.CreateOnNext('A')),
    new Recorded<Notification<char>>(300.MsTicks(), Notification.CreateOnNext('B')),
    new Recorded<Notification<char>>(400.MsTicks(), Notification.CreateOnNext('A')),
    new Recorded<Notification<char>>(500.MsTicks(), Notification.CreateOnNext('A')),
    new Recorded<Notification<char>>(510.MsTicks(), Notification.CreateOnNext('C')),
    new Recorded<Notification<char>>(550.MsTicks(), Notification.CreateOnNext('B')),
    new Recorded<Notification<char>>(610.MsTicks(), Notification.CreateOnNext('B'))
);

var target = source.TimedDistinct(TimeSpan.FromMilliseconds(300), ts);

var expectedResults = ts.CreateHotObservable<char>(
    new Recorded<Notification<char>>(200.MsTicks(), Notification.CreateOnNext('A')),
    new Recorded<Notification<char>>(300.MsTicks(), Notification.CreateOnNext('B')),
    new Recorded<Notification<char>>(500.MsTicks(), Notification.CreateOnNext('A')),
    new Recorded<Notification<char>>(510.MsTicks(), Notification.CreateOnNext('C')),
    new Recorded<Notification<char>>(610.MsTicks(), Notification.CreateOnNext('B'))
);

var observer = ts.CreateObserver<char>();
target.Subscribe(observer);
ts.Start();

ReactiveAssert.AreElementsEqual(expectedResults.Messages, observer.Messages);

Solution includes a number of overloads for TimedDistinct, allowing for IScheduler injection, as well as IEqualityComparer<T> injection, similar to Distinct. Ignoring all those overloads, the solution rests on a helper method StateWhere, which is kind of like a combination of Scan and Where: It filters like a Where, but allows you to embed state in it like Scan.

public static class RxState
{
    public static IObservable<TSource> TimedDistinct<TSource>(this IObservable<TSource> source, TimeSpan expirationTime)
    {
        return TimedDistinct(source, expirationTime, Scheduler.Default);    
    }

    public static IObservable<TSource> TimedDistinct<TSource>(this IObservable<TSource> source, TimeSpan expirationTime, IScheduler scheduler)
    {
        return TimedDistinct<TSource>(source, expirationTime, EqualityComparer<TSource>.Default, scheduler);
    }

    public static IObservable<TSource> TimedDistinct<TSource>(this IObservable<TSource> source, TimeSpan expirationTime, IEqualityComparer<TSource> comparer)
    {
        return TimedDistinct(source, expirationTime, comparer, Scheduler.Default);
    }

    public static IObservable<TSource> TimedDistinct<TSource>(this IObservable<TSource> source, TimeSpan expirationTime, IEqualityComparer<TSource> comparer, IScheduler scheduler)
    {
        var toReturn = source
            .Timestamp(scheduler)
            .StateWhere(
                new Dictionary<TSource, DateTimeOffset>(comparer),
                (state, item) => item.Value,
                (state, item) => state
                    .Where(kvp => item.Timestamp - kvp.Value < expirationTime)
                    .Concat( 
                        !state.ContainsKey(item.Value) || item.Timestamp - state[item.Value] >= expirationTime
                            ? Enumerable.Repeat(new KeyValuePair<TSource, DateTimeOffset>(item.Value, item.Timestamp), 1)
                            : Enumerable.Empty<KeyValuePair<TSource, DateTimeOffset>>()
                    )
                    .ToDictionary(kvp => kvp.Key, kvp => kvp.Value, comparer),
                (state, item) => !state.ContainsKey(item.Value) || item.Timestamp - state[item.Value] >= expirationTime
        );
        return toReturn;
    }

    public static IObservable<TResult> StateSelectMany<TSource, TState, TResult>(
            this IObservable<TSource> source,
            TState initialState,
            Func<TState, TSource, IObservable<TResult>> resultSelector,
            Func<TState, TSource, TState> stateSelector
        )
    {
        return source
            .Scan(Tuple.Create(initialState, Observable.Empty<TResult>()), (state, item) => Tuple.Create(stateSelector(state.Item1, item), resultSelector(state.Item1, item)))
            .SelectMany(t => t.Item2);
    }

    public static IObservable<TResult> StateWhere<TSource, TState, TResult>(
            this IObservable<TSource> source,
            TState initialState,
            Func<TState, TSource, TResult> resultSelector,
            Func<TState, TSource, TState> stateSelector,
            Func<TState, TSource, bool> filter
        )
    {
        return source
            .StateSelectMany(initialState, (state, item) =>
                    filter(state, item) ? Observable.Return(resultSelector(state, item)) : Observable.Empty<TResult>(),
                stateSelector);
    }
}

The accepted answer has two flaws:

  1. It doesn't accept IScheduler injection, meaning that it is hard to test within the Rx testing framework. This is easy to fix.
  2. It relies on mutable state, which doesn't work well in a multi-threaded framework like Rx.

Issue #2 is noticeable with multiple subscribers:

var observable = Observable.Range(0, 5)
    .DistinctFor(TimeSpan.MaxValue)
    ;

observable.Subscribe(i => Console.WriteLine(i));
observable.Subscribe(i => Console.WriteLine(i));

The output, following regular Rx behavior, should be outputting 0-4 twice. Instead, 0-4 is outputted just once.

Here's another sample flaw:

var subject = new Subject<int>();
var observable = subject
    .DistinctFor(TimeSpan.MaxValue);

observable.Subscribe(i => Console.WriteLine(i));
observable.Subscribe(i => Console.WriteLine(i));

subject.OnNext(1);
subject.OnNext(2);
subject.OnNext(3);

This outputs 1 2 3 once, not twice.


Here's the code for MsTicks:

public static class RxTestingHelpers
{
    public static long MsTicks(this int ms)
    {
        return TimeSpan.FromMilliseconds(ms).Ticks;
    }

}
Shlomo
  • 14,102
  • 3
  • 28
  • 43
  • 1
    Good answer. It's obvious that you've given this much greater consideration than I did. The hot/cold consideration is seriously nasty... when the [goto article](http://davesexton.com/blog/post/Hot-and-Cold-Observables.aspx) about it is long enough to fall asleep half-way through, it makes me wonder if there's a barrier that's a little too high for mere mortals. Maybe that's the attraction. – spender Feb 26 '17 at 21:20
  • 1
    Thanks. To be clear, while hot/cold is a nasty gotcha, that isn't the root of the flaw here: The problem is mutable state in an operator. The problem with mutable state in an operator manifests when you have multiple subscribers. See newly posted code using hot observables only. – Shlomo Feb 27 '17 at 00:15
  • tnx for adding your thoughts about my question - I will fix my code right away – dnf Mar 07 '17 at 19:54
  • I'm trying to run your code but I'm missing MsTicks extension - without that (when passing only the umbers without this extension) test is not passing because actual result i A B C. Can you attach the code for MsTicks? – dnf Mar 11 '17 at 12:45
3

With a wrapper class that timestamps items, but does not consider the timestamp (created field) for hashing or equality:

public class DistinctForItem<T> : IEquatable<DistinctForItem<T>>
{
    private readonly T item;
    private DateTime created;

    public DistinctForItem(T item)
    {
        this.item = item;
        this.created = DateTime.UtcNow;
    }

    public T Item
    {
        get { return item; }
    }

    public DateTime Created
    {
        get { return created; }
    }

    public bool Equals(DistinctForItem<T> other)
    {
        if (ReferenceEquals(null, other)) return false;
        if (ReferenceEquals(this, other)) return true;
        return EqualityComparer<T>.Default.Equals(Item, other.Item);
    }

    public override bool Equals(object obj)
    {
        if (ReferenceEquals(null, obj)) return false;
        if (ReferenceEquals(this, obj)) return true;
        if (obj.GetType() != this.GetType()) return false;
        return Equals((DistinctForItem<T>)obj);
    }

    public override int GetHashCode()
    {
        return EqualityComparer<T>.Default.GetHashCode(Item);
    }

    public static bool operator ==(DistinctForItem<T> left, DistinctForItem<T> right)
    {
        return Equals(left, right);
    }

    public static bool operator !=(DistinctForItem<T> left, DistinctForItem<T> right)
    {
        return !Equals(left, right);
    }
}

It is now possible to write a DistinctFor extension method:

public static IObservable<T> DistinctFor<T>(this IObservable<T> src, 
                                            TimeSpan validityPeriod)
{
    //if HashSet<DistinctForItem<T>> actually allowed us the get at the 
    //items it contains it would be a better choice. 
    //However it doesn't, so we resort to 
    //Dictionary<DistinctForItem<T>, DistinctForItem<T>> instead.

    var hs = new Dictionary<DistinctForItem<T>, DistinctForItem<T>>();
    return src.Select(item => new DistinctForItem<T>(item)).Where(df =>
    {
        DistinctForItem<T> hsVal;
        if (hs.TryGetValue(df, out hsVal))
        {
            var age = DateTime.UtcNow - hsVal.Created;
            if (age < validityPeriod)
            {
                return false;
            }
        }
        hs[df] = df;
        return true;

    }).Select(df => df.Item);
}

Which can be used:

Enumerable.Range(0, 1000)
    .Select(i => i % 3)
    .ToObservable()
    .Pace(TimeSpan.FromMilliseconds(500)) //drip feeds the observable
    .DistinctFor(TimeSpan.FromSeconds(5))
    .Subscribe(x => Console.WriteLine(x));

For reference, here is my implementation of Pace<T>:

public static IObservable<T> Pace<T>(this IObservable<T> src, TimeSpan delay)
{
    var timer = Observable
        .Timer(
            TimeSpan.FromSeconds(0),
            delay
        );

    return src.Zip(timer, (s, t) => s);
}
spender
  • 117,338
  • 33
  • 229
  • 351