0

Disruptor is supposed to be much faster than BlockingCollection.

In my previous question, Why is my disruptor example so slow? I've written two tests. Disruptor spent about 1 microsecond (or less) while BlockingCollection spent about 14 microseconds.

So I've decided to use Disruptor in my program, but when I've implemented it I've found that now Disruptor spends about 50 microseconds while BlockingCollection is still spenting 14-18 microseconds.

I've modified my production code to be "standalone test" and Disruptor still spends 50 microseconds. Why?

A simplified test is below. In this test I have two options. The first option is to Sleep for 1 ms. Then Disruptor spends 30-50 microseconds to deliver. The Second option is to simulate activity. Then Disruptor spends 7 microseconds to deliver. The same test with BlockingCollection results in 14-18 microseconds. So why is Disruptor not faster than BlockingCollection?

In my real application Disruptor spends 50 microseconds to deliver what is too much! I expect it should deliver messages much faster than 1 microsecond.

using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;

namespace DisruptorTest
{
    public sealed class ValueEntry
    {
        internal int Id { get; set; }
    }

    class DisruptorTest
    {

        public class MyHandler : IEventHandler<ValueEntry>
        {
            private DisruptorTest _parent;

            public MyHandler(DisruptorTest parent)
            {
                this._parent = parent;
            }

            public void OnNext(ValueEntry data, long sequence, bool endOfBatch)
            {
                _parent.sw.Stop();
                long microseconds = _parent.sw.ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));

                // Filter out abnormal delays > 1000
                if (microseconds < 1000)
                {
                    _parent.sum += (int)microseconds;
                    _parent.count++;
                    if (_parent.count % 1000 == 0)
                    {
                        Console.WriteLine("average disruptor delay (microseconds) = {0}", _parent.sum / _parent.count);
                    }
                }
            }
        }

        private RingBuffer<ValueEntry> _ringBuffer;
        private const int RingSize = 64;

        static void Main(string[] args)
        {
            new DisruptorTest().Run();
        }

        public void Run()
        {
            var disruptor = new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), RingSize, TaskScheduler.Default);
            disruptor.HandleEventsWith(new MyHandler(this));

            _ringBuffer = disruptor.Start();

            for (int i = 0; i < 10001; i++)
            {
                Do();

                // We need to simulate activity to allow event to deliver

                // Option1. just Sleep. Result 30-50 microseconds.
                Thread.Sleep(1);

                // Option2. Do something. Result ~7 microseconds.
                //factorial = 1;
                //for (int j = 1; j < 100000; j++)
                //{
                //    factorial *= j;
                //}
            }
        }

        public static int factorial;

        private Stopwatch sw = Stopwatch.StartNew();
        private int sum;
        private int count;

        public void Do()
        {
            long sequenceNo = _ringBuffer.Next();
            _ringBuffer[sequenceNo].Id = 0;
            sw.Restart();
            _ringBuffer.Publish(sequenceNo);
        }

    }
}

OLD code. Should be ignored now:

using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;

namespace DisruptorTest
{
    public sealed class ValueEntry
    {
        internal int Id { get; set; }
    }

    class DisruptorTest
    {

        public class MyHandler : IEventHandler<ValueEntry>
        {
            private readonly int _ordinal;
            private readonly int _consumers;
            private DisruptorTest _parent;

            public MyHandler(int ordinal, int consumers, DisruptorTest parent)
            {
                _ordinal = ordinal;
                _consumers = consumers;
                this._parent = parent;
            }

            public void OnNext(ValueEntry data, long sequence, bool endOfBatch)
            {
                if ((sequence % _consumers) == _ordinal)
                {
                    var id = data.Id;
                    _parent.sw[id].Stop();
                    long microseconds = _parent.sw[id].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
                    // filter out abnormal delays > 1000
                    if (microseconds < 1000)
                    {
                        _parent.sum[id] += (int)microseconds;
                        _parent.count[id]++;
                        if (_parent.count[id] % 10 == 0)
                        {
                            Console.WriteLine("Id = {0} average disruptor delay (microseconds) = {1}",
                                id, _parent.sum[id] / _parent.count[id]);
                        }
                    }
                }
            }
        }

        private const int NumberOfThreads = 1;
        private RingBuffer<ValueEntry> _ringBuffer;
        private const int RingSize = 64;

        static void Main(string[] args)
        {
            new DisruptorTest().Run();
        }

        public void Run()
        {
            var disruptor = new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), RingSize, TaskScheduler.Default);
            for (int i = 0; i < NumberOfThreads; i++)
                disruptor.HandleEventsWith(new MyHandler(i, NumberOfThreads, this));

            for (int i = 0; i < sw.Length; i++)
            {
                sw[i] = Stopwatch.StartNew();
            }

            _ringBuffer = disruptor.Start();

            //var rnd = new Random();
            for (int i = 0; i < 1000; i++)
            {
                //Do(rnd.Next(MaxId));
                Do(i % MaxId);
                Thread.Sleep(1);
            }
        }

        private const int MaxId = 100;

        private Stopwatch[] sw = new Stopwatch[MaxId];
        private int[] sum = new int[MaxId];
        private int[] count = new int[MaxId];

        public void Do(int id)
        {
            long sequenceNo = _ringBuffer.Next();
            _ringBuffer[sequenceNo].Id = id;
            sw[id].Restart();
            _ringBuffer.Publish(sequenceNo);
        }

    }
}

Output:

......
Id = 91 average disruptor delay (microseconds) = 50
Id = 92 average disruptor delay (microseconds) = 48
Id = 93 average disruptor delay (microseconds) = 35
Id = 94 average disruptor delay (microseconds) = 35
Id = 95 average disruptor delay (microseconds) = 51
Id = 96 average disruptor delay (microseconds) = 55
Id = 97 average disruptor delay (microseconds) = 38
Id = 98 average disruptor delay (microseconds) = 37
Id = 99 average disruptor delay (microseconds) = 45
Community
  • 1
  • 1
Oleg Vazhnev
  • 23,239
  • 54
  • 171
  • 305
  • Please explain what you think you're doing in your test. It's very difficult to follow your test. – Kiril Nov 15 '12 at 16:05
  • i just publish numbers from 1 to MaxId to NumberOfThreads consumers. I do measure "average one item delay" for each id (i'm using arrays for that). I think test can be simplified, i will try that. – Oleg Vazhnev Nov 16 '12 at 05:18

2 Answers2

4

You're still doing the same thing: you're measuring how much time it takes to publish a single item.

public void Do(int id)
{
    long sequenceNo = _ringBuffer.Next();
    _ringBuffer[sequenceNo].Id = id;
    sw[id].Restart(); // <--- You're doing this EVERY TIME YOU PUBLISH an item!
    _ringBuffer.Publish(sequenceNo);
}

In your previous question you were informed that you should be measuring THOUSANDS of publishes in order to properly utilize the Stopwatch precision.

Furthermore, you're still writing to console in the middle of your test. Avoid doing this:

if (_parent.count[id] % 10 == 0)
{
    Console.WriteLine("Id = {0} average disruptor delay (microseconds) = {1}",
        id, _parent.sum[id] / _parent.count[id]);
}

Clean Up Your Code

At the very least, you should try to clean you your code a bit; I've re-organized it a bit so it's not so messy: http://pastie.org/5382971

Disrputors are not that simple to begin with, now we have to deal with your code AND try to tell you how to fix it. More importantly: you can't do performance optimizations or testing when you have spaghetti code. Try to keep everything simple and clean. At this stage, your code is neither simple nor clean.

Let's start with the horrific naming conventions for your private member variables:

private const int NumberOfThreads = 1;
private RingBuffer<ValueEntry> _ringBuffer;
private const int RingSize = 64;
private const int MaxId = 100
private Stopwatch[] sw = new Stopwatch[MaxId];
private int[] sum = new int[MaxId];
private int[] count = new int[MaxId];

Be consistent:

private const int _numberOfThreads = 1;
private RingBuffer<ValueEntry> _ringBuffer;
private const int _ringSize = 64;
private const int _maxId = 100
private Stopwatch[] _sw = new Stopwatch[MaxId];
private int[] _sum = new int[MaxId];
private int[] _count = new int[MaxId];

Some other pointers:

  • Get rid of the nested classes.
  • Move the main out into a separate class (such as Program).

Build a Good Test

One of the first things that Martin and Michael tell you is that the performance testing has to be very good as well, so they've spend quite a bit of time building out a testing framework.

  • I'd recommend that you try a couple of million events, not 1000 events.
  • Make sure that you use only one timer for ALL the events.
  • Start the timer when you start processing items and stop it when there are no more items to process.
  • An efficient way to know when you've finished processing items is to use a CountDownEvent.

Update

So let's get the first dispute out of the way: the precision of the stopwatch should indeed be sufficient.

Int64 frequency = Stopwatch.Frequency;
Console.WriteLine( "  Timer frequency in ticks per second = {0}", frequency );
Int64 nanosecPerTick = (1000L * 1000L * 1000L) / frequency;
Console.WriteLine( "  Timer is accurate within {0} nanoseconds", nanosecPerTick );

On my machine the resolution is within 320 nanoseconds. So the OP is correct that the resolution on the timer should not be a problem.

I understand that the OP wants to measure average one item delivery, but there are (at least) two ways to do that.

We must investigate the difference. On a conceptual level, you're doing the exact thing as the code below:

  1. You're running a bunch of iterations.
  2. Measure every single one of them.
  3. You compute the total.
  4. You compute the average at the end.

In code:

Stopwatch sw = new Stopwatch();
long totalMicroseconds = 0;
int numItems = 1000;
for(int i = 0; i < numItems; i++)
{
    sw.Reset();
    sw.Start();
    OneItemDelivery();
    sw.Stop();
    totalMicroseconds += sw.ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
}
long avgOneItemDelivery = totalMicroseconds/numItems;

An alternative way of measuring the performance is this:

  1. Start the timer.
  2. Run all of the iterations.
  3. Stop the timer.
  4. Compute the average time.

In code:

sw.Start();
for(int i = 0; i < numItems; i++)
{
    OneItemDelivery();    
}
sw.Stop();
totalMicroseconds = sw.ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
long avgOneItemDelivery = totalMicroseconds/numItems;

Each one can have its own problems:

  • The first method can be less precise and you need to prove on your system that the Stopwatch can precisely handle that little amount of work (beyond simply calculating the nanosecond precision).
  • The second method will also include the computational time it takes for the iteration to occur. This introduces a small amount of bias into your measurements, but it can counter the precision issues you would normally see with the first method.

You've already noticed that a Sleep statement produces lower performance, so I would recommend that you do a simple calculation. Calculating the factorial seems like a good idea, just make it a very small calculation: no need for 100000, 100 should be fine too.

Of course, you don't need to wait 2 minutes for a test, but 10-20 seconds shouldn't be a problem.

Kiril
  • 39,672
  • 31
  • 167
  • 226
  • i am measuring "average one item delivery" because this is what i need to measure! and Stopwatch precision is more than enough! in my real application i do deliver one message, and i do not deliver millions of message! i'm writing to console when `Stopwatch` is already stopped so it doesn't change anything. Test is a little bit complicated, because I've just took class from my real application and modified it. So this test is pretty close to what I have in real life. I would say that this is the exactly application I need to optimize. – Oleg Vazhnev Nov 16 '12 at 05:13
  • changing number from 1000 to millions doesn't change anything, but in posted code i've used 1000 so people don't need to wait 1-2 minute while test is running. The same test but with `BlockingCollection` is 2-3 times faster. Why in my test BlockingCollection is faster than Disruptor? – Oleg Vazhnev Nov 16 '12 at 05:16
  • regarding code convenstions. I've named variables as Resharper suggest. so `const` should be `Uppercased`. – Oleg Vazhnev Nov 16 '12 at 07:01
  • @javapowered to me it seems that you really don't understand performance testing... you way to measure is basically not suitable and gives you unreliable/wrong results... please what is stated in the answer... – Yahia Nov 16 '12 at 07:13
  • @Yahia to me you don't understand that i don't need to measure "tests". I've measured real application. And I've found that using `Disruptor` real application is slower!. And my test is simplified version of my real application. It's completely valid and I expect Disruptor to be faster than BlockingCollection. – Oleg Vazhnev Nov 16 '12 at 07:15
  • @Yahia you are welcome to rewrite my test (now very simple, please refer the question) :) – Oleg Vazhnev Nov 16 '12 at 07:16
  • +1 for useful and well phrased answer. OP should also note there is no special reason to assume that `disruptor-net` is faster than concurrent queue. The author himself hinted that might be the case. – avishayp Nov 16 '12 at 07:46
  • @javapowered I've updated my answer. Technically, the Stopwatch precision should be enough, but you still need to confirm it (beyond simply calculating the nanosecond precision). – Kiril Nov 16 '12 at 16:13
  • @Lirik thank you very much for your effort. you've suggested to use `CountdownEvent`. note that raising and handling events is likely expensive operation. doing it for each iteration may significantly change results. I think my original test is correct less-or-more and the problem actually is that i'm not using disruptor currectly – Oleg Vazhnev Nov 16 '12 at 21:06
  • @javapowered The `CountdownEvent` does not raise an event until the internal count reaches 0. The count is decremented atomically, so the cost is almost negligent. Your use of the Disruptor might not be optimal, but it's difficult to tell. – Kiril Nov 16 '12 at 21:31
1

I read the BlockingCollecton code you wrote from Why is my disruptor example so slow?, You add many Console.WriteLine in Disruptor but no one in BlockingCollection, Console.WriteLine is slow, it have a lock inside.

Your RingBufferSize is too small, this effects This affects performance, shoule be 1024 or larger.

and while (!dataItems.IsCompleted) may have some problem, BlockCollection is's always in adding state, it will cause thread ends early.

Task.Factory.StartNew(() => {
    while (!dataItems.IsCompleted)
    {

        ValueEntry ve = null;
        try
        {
    ve = dataItems.Take();
    long microseconds = sw[ve.Value].ElapsedTicks / (Stopwatch.Frequency / (1000L * 1000L));
    results[ve.Value] = microseconds;

    //Console.WriteLine("elapsed microseconds = " + microseconds);
    //Console.WriteLine("Event handled: Value = {0} (processed event {1}", ve.Value, ve.Value);
        }
        catch (InvalidOperationException) { }
    }
}, TaskCreationOptions.LongRunning);


for (int i = 0; i < length; i++)
{
    var valueToSet = i;

    ValueEntry entry = new ValueEntry();
    entry.Value = valueToSet;

    sw[i].Restart();
    dataItems.Add(entry);

    //Console.WriteLine("Published entry {0}, value {1}", valueToSet, entry.Value);
    //Thread.Sleep(1000);
}

I have rewrite you code, Disruptor is 10x faster than BlockingCollection with multi producer (10 parallel producet), 2x faster than BlockingCollection with Single producer:

using System;
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
using Disruptor;
using Disruptor.Dsl;
using NUnit.Framework;

namespace DisruptorTest.Ds
{
    public sealed class ValueEntry
    {
        internal int Id { get; set; }
    }

    class MyHandler : IEventHandler<ValueEntry>
    {
        public void OnEvent(ValueEntry data, long sequence, bool endOfBatch)
        {
        }
    }

    [TestFixture]
    public class DisruptorPerformanceTest
    {
        private volatile bool collectionAddEnded;

        private int producerCount = 10;
        private int runCount = 1000000;
        private int RingBufferAndCapacitySize = 1024;

        [TestCase()]
        public async Task TestBoth()
        {
            for (int i = 0; i < 1; i++)
            {
                foreach (var rs in new int[] {64, 512, 1024, 2048 /*,4096,4096*2*/})
                {
                    Console.WriteLine($"RingBufferAndCapacitySize:{rs}, producerCount:{producerCount}, runCount:{runCount} of {i}");
                    RingBufferAndCapacitySize = rs;
                    await DisruptorTest();
                    await BlockingCollectionTest();
                }
            }
        }

        [TestCase()]
        public async Task BlockingCollectionTest()
        {
            var sw = new Stopwatch();
            BlockingCollection<ValueEntry> dataItems = new BlockingCollection<ValueEntry>(RingBufferAndCapacitySize);

            sw.Start();

            collectionAddEnded = false;

            // A simple blocking consumer with no cancellation.
            var task = Task.Factory.StartNew(() =>
            {
                while (!collectionAddEnded && !dataItems.IsCompleted)
                {
                    //if (!dataItems.IsCompleted && dataItems.TryTake(out var ve))
                    if (dataItems.TryTake(out var ve))
                    {
                    }
                }
            }, TaskCreationOptions.LongRunning);


            var tasks = new Task[producerCount];
            for (int t = 0; t < producerCount; t++)
            {
                tasks[t] = Task.Run(() =>
                {
                    for (int i = 0; i < runCount; i++)
                    {
                        ValueEntry entry = new ValueEntry();
                        entry.Id = i;
                        dataItems.Add(entry);
                    }
                });
            }

            await Task.WhenAll(tasks);

            collectionAddEnded = true;
            await task;

            sw.Stop();

            Console.WriteLine($"BlockingCollectionTest Time:{sw.ElapsedMilliseconds/1000d}");
        }


        [TestCase()]
        public async Task DisruptorTest()
        {
            var disruptor =
                new Disruptor.Dsl.Disruptor<ValueEntry>(() => new ValueEntry(), RingBufferAndCapacitySize, TaskScheduler.Default,
                    producerCount > 1 ? ProducerType.Multi : ProducerType.Single, new BlockingWaitStrategy());
            disruptor.HandleEventsWith(new MyHandler());

            var _ringBuffer = disruptor.Start();

            Stopwatch sw = Stopwatch.StartNew();

            sw.Start();


            var tasks = new Task[producerCount];
            for (int t = 0; t < producerCount; t++)
            {
                tasks[t] = Task.Run(() =>
                {
                    for (int i = 0; i < runCount; i++)
                    {
                        long sequenceNo = _ringBuffer.Next();
                        _ringBuffer[sequenceNo].Id = 0;
                        _ringBuffer.Publish(sequenceNo);
                    }
                });
            }


            await Task.WhenAll(tasks);


            disruptor.Shutdown();

            sw.Stop();
            Console.WriteLine($"DisruptorTest Time:{sw.ElapsedMilliseconds/1000d}s");
        }
    }
}

BlockingCollectionTest with a shared ValueEntry instance (no new ValueEntry() in for loop)

  • RingBufferAndCapacitySize:64, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:16.962s

    BlockingCollectionTest Time:18.399

  • RingBufferAndCapacitySize:512, producerCount:10, runCount:1000000 of 0 DisruptorTest Time:6.101s

    BlockingCollectionTest Time:19.526

  • RingBufferAndCapacitySize:1024, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:2.928s

    BlockingCollectionTest Time:20.25

  • RingBufferAndCapacitySize:2048, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:2.448s

    BlockingCollectionTest Time:20.649

BlockingCollectionTest create a new ValueEntry() in for loop

  • RingBufferAndCapacitySize:64, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:27.374s

    BlockingCollectionTest Time:21.955

  • RingBufferAndCapacitySize:512, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:5.011s

    BlockingCollectionTest Time:20.127

  • RingBufferAndCapacitySize:1024, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:2.877s

    BlockingCollectionTest Time:22.656

  • RingBufferAndCapacitySize:2048, producerCount:10, runCount:1000000 of 0

    DisruptorTest Time:2.384s

    BlockingCollectionTest Time:23.567

https://www.cnblogs.com/darklx/p/11755686.html

shine
  • 610
  • 3
  • 10