2

At work we need to asynchronously record JSON data that we receive from various endpoints. We used to write this out in a simple straight to file manner. But this is proving slow. So we want to switch to producer/consumer pattern.

BlockingCollection seemed to fit the bill nicely so I created a class that used BlockingCollection like this

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using JetBrains.Annotations;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using RestSharp;

namespace Test
{
    public class JsonRecorder : IJsonRecorder, IDisposable
    {
        private String _jsonFileName { get; set; }
        private DateTime _jsonWriterDate { get; set; } = DateTime.MinValue;
        private readonly JsonSerializerSettings _jsonDateSerializerSettings = new JsonSerializerSettings {DateFormatString = "yyyy-MM-ddTHH:mm:ss.fffZ"};
        private BlockingCollection<string> _itemsToWriteQueue = new BlockingCollection<string>();
        private Boolean _disposed = false;
        private Boolean _ShouldConsumerProcessRun = false;
        private Boolean _isStarted = false;
        private Task _dequeuerTask;
        private object _syncLock = new object();

        public String Name { get; }
        public Exchange Exchange { get; }
        public string FilePath { get;  }
        public ITimeProvider TimeProvider { get; }
        private ISimpleLogService LogService { get; }

        public JsonRecorder(String name, Exchange exchange, [NotNull] ISimpleLogService simpleLogService, String filePath)
            :this(name, exchange, simpleLogService, filePath, new DefaultTimeProvider())
        {
        }

        public JsonRecorder(String name, Exchange exchange, [NotNull] ISimpleLogService simpleLogService, String filePath, [NotNull] ITimeProvider timeProvider)
        {
            Exchange = exchange;
            Name = name;
            LogService = simpleLogService ?? throw new ArgumentNullException(nameof(simpleLogService));
            FilePath = filePath;
            TimeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
        }

        public Boolean InitJsonAuditFile()
        {
            try
            {
                var now = TimeProvider.DateTimeUtcNow;
                if (_jsonWriterDate.Hour == now.Hour)
                    return true;

                if (!String.IsNullOrEmpty(_jsonFileName))
                {
                    ThreadPool.QueueUserWorkItem(_ => { ZipJsonFile(_jsonFileName); });
                    //ZipFileTask.Start();
                }

                _jsonWriterDate = now;
                var directoryName = $"{FilePath}/{_jsonWriterDate:yyyyMMdd}";
                if (!Directory.Exists(directoryName))
                    Directory.CreateDirectory(directoryName);

                _jsonFileName = $@"{directoryName}/{_jsonWriterDate:yyyyMMdd_HHmmss}_{Name}.txt";
                return true;
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
            return false;
        }

        public void ZipJsonFile(String fileName)
        {
            if (String.IsNullOrEmpty(fileName))
            {
                throw new ArgumentNullException(nameof(fileName));
            }
            try
            {
                using (var zip = ZipFile.Open($"{fileName}.zip", ZipArchiveMode.Create))
                {
                    zip.CreateEntryFromFile(fileName, Path.GetFileName(fileName));
                }
                File.Delete(fileName);
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
        }

        public void JsonRecord(IRestClient client, Dictionary<String, String> body)
        {
            try
            {
                var record = new
                {
                    date = TimeProvider.DateTimeUtcNow,
                    url = client.BaseUrl,
                    body = body?.Select(parameter => new
                    {
                        name = parameter.Key,
                        value = parameter.Value,
                    })
                };
                _itemsToWriteQueue.Add(JsonConvert.SerializeObject(record, _jsonDateSerializerSettings));
            }
            catch (Exception)
            {
                // ignored
            }
        }



        public void JsonRecord(String stringifiedResponse)
        {
            try
            {
                _itemsToWriteQueue.Add(stringifiedResponse);
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
        }


        public void Stop()
        {
            lock (_syncLock)
            {
                _itemsToWriteQueue.CompleteAdding();
                _ShouldConsumerProcessRun = false;
                _dequeuerTask?.Wait(TimeSpan.FromSeconds(5));
            }
        }

        public bool Start()
        {
            lock (_syncLock)
            {
                if (!_isStarted)
                {
                    _isStarted = true;
                    _dequeuerTask = Task.Run(() =>
                    {
                        Thread.CurrentThread.Name = "JsonDequeuerTask";
                        RunConsumerProcess();
                    });
                }
                return true;
            }
        }

        /// <inheritdoc />
        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        private void RunConsumerProcess()
        {
            _ShouldConsumerProcessRun = true;
            while (_ShouldConsumerProcessRun && !_itemsToWriteQueue.IsCompleted)
            {
                InitJsonAuditFile();

                string itemToWriteToFile = null;
                try
                {
                    itemToWriteToFile = _itemsToWriteQueue.Take();
                }
                catch (InvalidOperationException) { }

                if (itemToWriteToFile != null)
                {
                    using (var stream = File.Open(_jsonFileName, FileMode.Append, FileAccess.Write))
                    {
                        using (var sw = new StreamWriter(stream))
                        {
                            sw.WriteLine(itemToWriteToFile);
                        }
                    }
                }
            }
        }

        private void Dispose(bool disposing)
        {
            if (_disposed)
                return;

            if (disposing)
            {
                Stop();
            }

            _disposed = true;
        }
    }
}

However when I run this code on an actual VM, we see the memory if reaching 2G. I have seen this : The .Net Concurrent BlockingCollection has a memory leak? Which is supposed to be fixed in .NET 4.5 (we are running .NET 4.7.2) and I have also seen post too ConcurrentQueue holding on to a few dequeued elements

Still see huge memory foot print.

So we swapped to using this

public class BlockingQueueSlim<T>
{
    private readonly ConcurrentQueue<T> _queue = new ConcurrentQueue<T>();
    private readonly AutoResetEvent _autoResetEvent = new AutoResetEvent(false);
    private static readonly TimeSpan MinWait = TimeSpan.FromMilliseconds(1);


    public void Add(T item)
    {
        _queue.Enqueue(item);
        _autoResetEvent.Set();
    }

    public bool TryPeek(out T result)
    {
        return _queue.TryPeek(out result);
    }

    public T Take()
    {
        T item;
        while (!_queue.TryDequeue(out item))
            _autoResetEvent.WaitOne();
        return item;
    }

    public bool TryTake(out T item, TimeSpan patience)
    {
        if (_queue.TryDequeue(out item))
            return true;
        var stopwatch = Stopwatch.StartNew();
        while (stopwatch.Elapsed < patience)
        {
            if (_queue.TryDequeue(out item))
                return true;
            var patienceLeft = (patience - stopwatch.Elapsed);
            if (patienceLeft <= TimeSpan.Zero)
                break;
            else if (patienceLeft < MinWait)
                // otherwise the while loop will degenerate into a busy loop,
                // for the last millisecond before patience runs out
                patienceLeft = MinWait;
            _autoResetEvent.WaitOne(patienceLeft);
        }

        return false;
    }

    public int CurrentItemCount => _queue.Count;

}

Where I make use of it like this

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using JetBrains.Annotations;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
using RestSharp;

namespace Test
{
    public class JsonRecorder : IJsonRecorder, IDisposable
    {
        private String _jsonFileName { get; set; }
        private DateTime _jsonWriterDate { get; set; } = DateTime.MinValue;
        private readonly JsonSerializerSettings _jsonDateSerializerSettings = new JsonSerializerSettings {DateFormatString = "yyyy-MM-ddTHH:mm:ss.fffZ"};
        private BlockingQueueSlim<string> _itemsToWriteQueue = new BlockingQueueSlim<string>();
        private Boolean _disposed = false;
        private Boolean _ShouldConsumerProcessRun = false;
        private Boolean _isStarted = false;
        private Task _dequeuerTask;
        private object _syncLock = new object();
        private long _seqId = 0;

        public String Name { get; }
        public Exchange Exchange { get; }
        public string FilePath { get;  }
        public ITimeProvider TimeProvider { get; }
        private ISimpleLogService LogService { get; }

        public JsonRecorder(String name, Exchange exchange, [NotNull] ISimpleLogService simpleLogService, String filePath)
            :this(name, exchange, simpleLogService, filePath, new DefaultTimeProvider())
        {
        }

        public JsonRecorder(String name, Exchange exchange, [NotNull] ISimpleLogService simpleLogService, String filePath, [NotNull] ITimeProvider timeProvider)
        {
            Exchange = exchange;
            Name = name;
            LogService = simpleLogService ?? throw new ArgumentNullException(nameof(simpleLogService));
            FilePath = filePath;
            TimeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
        }

        public Boolean InitJsonAuditFile()
        {
            try
            {
                var now = TimeProvider.DateTimeUtcNow;
                if (_jsonWriterDate.Hour == now.Hour)
                    return true;

                if (!String.IsNullOrEmpty(_jsonFileName))
                {
                    ThreadPool.QueueUserWorkItem(_ => { ZipJsonFile(_jsonFileName); });
                    //ZipFileTask.Start();
                }

                _jsonWriterDate = now;
                var directoryName = $"{FilePath}/{_jsonWriterDate:yyyyMMdd}";
                if (!Directory.Exists(directoryName))
                    Directory.CreateDirectory(directoryName);

                _jsonFileName = $@"{directoryName}/{_jsonWriterDate:yyyyMMdd_HHmmss}_{Name}.txt";
                return true;
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
            return false;
        }

        public void ZipJsonFile(String fileName)
        {
            if (String.IsNullOrEmpty(fileName))
            {
                throw new ArgumentNullException(nameof(fileName));
            }
            try
            {
                using (var zip = ZipFile.Open($"{fileName}.zip", ZipArchiveMode.Create))
                {
                    zip.CreateEntryFromFile(fileName, Path.GetFileName(fileName));
                }
                File.Delete(fileName);
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
        }


        public void JsonRecord(IRestClient client, Dictionary<String, String> body)
        {
            try
            {
                var record = new
                {
                    seqId = Interlocked.Increment(ref _seqId),
                    date = TimeProvider.DateTimeUtcNow,
                    url = client.BaseUrl,
                    body = body?.Select(parameter => new
                    {
                        name = parameter.Key,
                        value = parameter.Value,
                    })
                };
                _itemsToWriteQueue.Add(JsonConvert.SerializeObject(record, _jsonDateSerializerSettings));
            }
            catch (Exception)
            {
                // ignored
            }
        }

        public void JsonRecord(String stringifiedResponse)
        {
            try
            {
                _itemsToWriteQueue.Add(stringifiedResponse);
            }
            catch (Exception ex)
            {
                LogService.LogException(this, LogCategory.GW, Exchange, ex);
            }
        }

        public void Stop()
        {
            lock (_syncLock)
            {
                _isStarted = false;
                _ShouldConsumerProcessRun = false;
                _dequeuerTask?.Wait(TimeSpan.FromSeconds(5));
            }
        }

        public bool Start()
        {
            lock (_syncLock)
            {
                if (!_isStarted)
                {
                    _isStarted = true;
                    _dequeuerTask = Task.Run(() =>
                    {
                        Thread.CurrentThread.Name = "JsonDequeuerTask";
                        RunConsumerProcess();
                    });
                }
                return true;
            }
        }

        /// <inheritdoc />
        public void Dispose()
        {
            Dispose(true);
            GC.SuppressFinalize(this);
        }

        private void RunConsumerProcess()
        {
            _ShouldConsumerProcessRun = true;
            while (_ShouldConsumerProcessRun)
            {
                InitJsonAuditFile();

                string itemToWriteToFile = null;
                try
                {
                    itemToWriteToFile = _itemsToWriteQueue.Take();
                }
                catch (InvalidOperationException) { }

                if (itemToWriteToFile != null)
                {
                    using (var stream = File.Open(_jsonFileName, FileMode.Append, FileAccess.Write))
                    {
                        using (var sw = new StreamWriter(stream))
                        {
                            sw.WriteLine(itemToWriteToFile);
                        }
                    }
                }
            }
        }

        private void Dispose(bool disposing)
        {
            if (_disposed)
                return;

            if (disposing)
            {
                Stop();
            }

            _disposed = true;
        }
    }
}

However this too ends up eating a lot of memory up to 2G.

I have also read various posts about ConcurrentQueue having memory leaks. Such as here

I am kind of lost now. what I need is

  • I am able to produce values from various source (different threads)
  • Consumer may run on dedicated thread
  • I CAN NOT lose data
  • I am ok if some circular buffer is used (providing I dont lose data). The consumer is fairly fast so this should not happen
  • Memory requirement is governed using some size parameter

What would people suggest as a viable approach to achieve this set of requirements, as so far .NET classes, do not seem to be working out for us

sacha barber
  • 2,214
  • 1
  • 24
  • 37
  • The problem you have it, that if your producer is faster than your consumer, it will fill memory with waiting data. Its also a problem if your consumer is much faster than your producer, as it will start a thread, process the available data and sleep the thread again (which is high on CPU overhead) – Steve Todd May 07 '19 at 11:16
  • What I've done in the past is to roll my own version of the queue that has an upper limit to size, plus a batch size. It only started consuming if the batch size was exceeded, and queue addition was paused until there were at least batch size free slots available. – Steve Todd May 07 '19 at 11:20
  • Do you have the code for this? – sacha barber May 07 '19 at 11:40
  • This is very complex. Sometimes that's necessary. In this case, it looks like you might be writing to multiple files simultaneously on different threads. That could be slower than just using one thread for file I/O and writing one file at a time. – Scott Hannen May 07 '19 at 12:13
  • There are couple of ways to overcome slow consumers problem. The simplest is to use the `BlockingCollection` boundedCapacity which can passed to its constructor. Once it's reached any producers which add items to the collection will be blocked. You however will need to ensure upper limit or just limit number of producers. Another more complicated way is to implement some sort of flow control (similar to what TCP protocol does) when consumer(s) can announce how many new items they can consume and producer can take this information into consideration on order to slow down or to stop producing. – Dmytro Mukalov May 07 '19 at 12:33
  • Probem is we wanted to make it asynchronous to offload the time taken by the producer to effectively a new thread. If the producer becomes blocked this is defeating the purpose of what we are trying to achieve – sacha barber May 07 '19 at 13:55
  • Then you need an unlimited queue to store your work units, which implies using a less limited than RAM storage, which in turn will add another degree of complexity and sounds more like comprehensive queuing solution task which in that case is more reasonable to solve with some existing off-the-shelf queuing or streaming technology with support of durable queues. – Dmytro Mukalov May 07 '19 at 14:44
  • Sadly I do not disagree with what you say. We have NATs streaming which is used here. I could propose that. Thanks for the advice – sacha barber May 07 '19 at 15:12
  • Have you taken a look at the Kafka message queue software? It's OSS, fast and designed for high volume systems. Clients can resume from a last known point in the stream and you can distribute the load over multiple servers. – Steve Todd May 07 '19 at 15:57
  • Yep quite familiar with it : https://sachabarbs.wordpress.com/kafka-series/ and https://sachabarbs.wordpress.com/kafka-streams-series/ are a series of blog posts I wrote on it. Sadly I cant push for that. Not my call That said we have NATs, which is fairly equivalent to Kafka, as it uses partitons, commit log etc etc. No KafkaStreams like stuff though. – sacha barber May 07 '19 at 19:00

0 Answers0