I have a custom "CachedEnumerable" class (inspired by Caching IEnumerable) that I need to make thread safe for my asp.net core web app.
Is the following implementation of the Enumerator thread safe? (All other reads/writes to IList _cache are locked appropriately) (Possibly related to Does the C# Yield free a lock?)
And more specifically, if there are 2 threads accessing the enumerator, how do I protect against one thread incrementing "index" causing a second enumerating thread from getting the wrong element from the _cache (ie. element at index + 1 instead of at index)? Is this race condition a real concern?
public IEnumerator<T> GetEnumerator()
{
var index = 0;
while (true)
{
T current;
lock (_enumeratorLock)
{
if (index >= _cache.Count && !MoveNext()) break;
current = _cache[index];
index++;
}
yield return current;
}
}
Full code of my version of CachedEnumerable:
public class CachedEnumerable<T> : IDisposable, IEnumerable<T>
{
IEnumerator<T> _enumerator;
private IList<T> _cache = new List<T>();
public bool CachingComplete { get; private set; } = false;
public CachedEnumerable(IEnumerable<T> enumerable)
{
switch (enumerable)
{
case CachedEnumerable<T> cachedEnumerable: //This case is actually dealt with by the extension method.
_cache = cachedEnumerable._cache;
CachingComplete = cachedEnumerable.CachingComplete;
_enumerator = cachedEnumerable.GetEnumerator();
break;
case IList<T> list:
//_cache = list; //without clone...
//Clone:
_cache = new T[list.Count];
list.CopyTo((T[]) _cache, 0);
CachingComplete = true;
break;
default:
_enumerator = enumerable.GetEnumerator();
break;
}
}
public CachedEnumerable(IEnumerator<T> enumerator)
{
_enumerator = enumerator;
}
private int CurCacheCount
{
get
{
lock (_enumeratorLock)
{
return _cache.Count;
}
}
}
public IEnumerator<T> GetEnumerator()
{
var index = 0;
while (true)
{
T current;
lock (_enumeratorLock)
{
if (index >= _cache.Count && !MoveNext()) break;
current = _cache[index];
index++;
}
yield return current;
}
}
//private readonly AsyncLock _enumeratorLock = new AsyncLock();
private readonly object _enumeratorLock = new object();
private bool MoveNext()
{
if (CachingComplete) return false;
if (_enumerator != null && _enumerator.MoveNext()) //The null check should have been unnecessary b/c of the lock...
{
_cache.Add(_enumerator.Current);
return true;
}
else
{
CachingComplete = true;
DisposeWrappedEnumerator(); //Release the enumerator, as it is no longer needed.
}
return false;
}
public T ElementAt(int index)
{
lock (_enumeratorLock)
{
if (index < _cache.Count)
{
return _cache[index];
}
}
EnumerateUntil(index);
lock (_enumeratorLock)
{
if (_cache.Count <= index) throw new ArgumentOutOfRangeException(nameof(index));
return _cache[index];
}
}
public bool TryGetElementAt(int index, out T value)
{
lock (_enumeratorLock)
{
value = default;
if (index < CurCacheCount)
{
value = _cache[index];
return true;
}
}
EnumerateUntil(index);
lock (_enumeratorLock)
{
if (_cache.Count <= index) return false;
value = _cache[index];
}
return true;
}
private void EnumerateUntil(int index)
{
while (true)
{
lock (_enumeratorLock)
{
if (_cache.Count > index || !MoveNext()) break;
}
}
}
public void Dispose()
{
DisposeWrappedEnumerator();
}
private void DisposeWrappedEnumerator()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
if (_cache is List<T> list)
{
list.Trim();
}
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public int CachedCount
{
get
{
lock (_enumeratorLock)
{
return _cache.Count;
}
}
}
public int Count()
{
if (CachingComplete)
{
return _cache.Count;
}
EnsureCachingComplete();
return _cache.Count;
}
private void EnsureCachingComplete()
{
if (CachingComplete)
{
return;
}
//Enumerate the rest of the collection
while (!CachingComplete)
{
lock (_enumeratorLock)
{
if (!MoveNext()) break;
}
}
}
public T[] ToArray()
{
EnsureCachingComplete();
//Once Caching is complete, we don't need to lock
if (!(_cache is T[] array))
{
array = _cache.ToArray();
_cache = array;
}
return array;
}
public T this[int index] => ElementAt(index);
}
public static CachedEnumerable<T> Cached<T>(this IEnumerable<T> source)
{
//no gain in caching a cache.
if (source is CachedEnumerable<T> cached)
{
return cached;
}
return new CachedEnumerable<T>(source);
}
}
Basic Usage: (Although not a meaningful use case)
var cached = expensiveEnumerable.Cached();
foreach (var element in cached) {
Console.WriteLine(element);
}
Update
I tested the current implementation based on @Theodors answer https://stackoverflow.com/a/58547863/5683904 and confirmed (AFAICT) that it is thread-safe when enumerated with a foreach without creating duplicate values (Thread-safe Cached Enumerator - lock with yield):
class Program
{
static async Task Main(string[] args)
{
var enumerable = Enumerable.Range(0, 1_000_000);
var cachedEnumerable = new CachedEnumerable<int>(enumerable);
var c = new ConcurrentDictionary<int, List<int>>();
var tasks = Enumerable.Range(1, 100).Select(id => Test(id, cachedEnumerable, c));
Task.WaitAll(tasks.ToArray());
foreach (var keyValuePair in c)
{
var hasDuplicates = keyValuePair.Value.Distinct().Count() != keyValuePair.Value.Count;
Console.WriteLine($"Task #{keyValuePair.Key} count: {keyValuePair.Value.Count}. Has duplicates? {hasDuplicates}");
}
}
static async Task Test(int id, IEnumerable<int> cache, ConcurrentDictionary<int, List<int>> c)
{
foreach (var i in cache)
{
//await Task.Delay(10);
c.AddOrUpdate(id, v => new List<int>() {i}, (k, v) =>
{
v.Add(i);
return v;
});
}
}
}