Recently I have been doing some tests with C# multi-threaded performance, below are the tests and the result:
Note that all the tests run three times for an array with the size of 1000000, the first run on the main thread, the second using Parallel.For
, the third uses a custom parallel function where run tasks equal to the number of Environment.ProcessorCount
and the load are distributed on the tasks as evenly as possible. I run the tests on i7-10700 which comes with 8 cores and 16 threads. I'm using .NET 6 and Visual Studio 2022.
My test cases:
- Simple Write: write the index value for every element in the array
- Write Random: write a random value for every element in the array
- Date struct: create a Date object for every element in the array (where Date is struct)
- Date class: create a Date object for every element in the array (where Date is class)
- Lookup: run multiple lookups to fetch the value that will be written to the array element
- Min: find the min value in the array
- Min with wrapper: find the min value in the array, wrap the min values, so they don't share the cache line
My measured times:
routine | single-thread | multi-thread (parallel for) | multi-thread (custom) |
---|---|---|---|
Simple Write | 6.4779 ms | 10.5307 ms | 2.751 ms |
Write Random | 8.2883 ms | 20.1144 ms | 5.7795 ms |
Date struct | 19.0835 ms | 76.5366 ms | 11.0556 ms |
Date class | 67.922 ms | 101.3405 ms | 71.2441 ms |
Lookup | 4.4336 ms | 2.293 ms | 1.3446 ms |
Min | 3.4373 ms | 6.9232 ms | 1.2026 ms |
Min with wrapper | 3.5319 ms | 5.834 ms | 1.3308 ms |
I attached the code to my test cases, my questions are:
- why is the
Parallel.For
run so poorly? - In tests one and two, I only achieved around 2X the performance, is there a better way to achieve more?
- this is just an observation: from tests three and four we can see that object allocation on the heap doesn't work well with multi-threading.
- test number five is where multi-threading shine, we can get to around 4x the performance of the single-thread, not sure why this is the case.
- shouldn't I be reaching 16x performance, am I doing anything wrong, is there anything I should be aware of?
using System.Diagnostics;
public static class Ext
{
public static T[] Fill<T>(this T[] array, Func<T> cons)
{
for (int i = 0; i < array.Length; i++)
array[i] = cons();
return array;
}
}
public static class App
{
public static void Main()
{
Simple_Write_Test();
Rand_Test();
DateStruct_Test();
DateClass_Test();
Lookup_Test();
Min_Test();
Min_Wrapper_Test();
Console.ReadKey();
}
const int size = 1000000;
public static void Simple_Write_Test()
{
RunTest("Simple Write", size, () =>
{
return (new int[size], new int[size], new int[size]);
},
(data, i) =>
{
data.Item1[i] = i;
data.Item2[i] = i;
data.Item3[i] = i;
},
(data, i) =>
{
data.Item1[i] = i;
data.Item2[i] = i;
data.Item3[i] = i;
},
(data, i, id) =>
{
data.Item1[i] = i;
data.Item2[i] = i;
data.Item3[i] = i;
});
}
public static void Rand_Test()
{
var rand_lock = new object();
RunTest("Write Random", size, () =>
{
return (new int[size],
new Random[Environment.ProcessorCount].Fill(() => new Random()));
},
(data, i) =>
{
var rand = data.Item2[0];
data.Item1[i] = rand.Next();
},
(data, i) =>
{
int value;
lock (rand_lock)
{
value = data.Item2[0].Next();
}
data.Item1[i] = value;
},
(data, i, id) =>
{
var rand = data.Item2[id];
data.Item1[i] = rand.Next();
});
}
struct DateStruct
{
public int year;
public int month;
public int day;
public DateStruct(int year, int month, int day)
{
this.year = year;
this.month = month;
this.day = day;
}
}
public static void DateStruct_Test()
{
var rand_lock = new object();
RunTest("Date struct", size, () =>
{
return (new DateStruct[size], new Random[Environment.ProcessorCount].Fill(() => new Random()));
},
(data, i) =>
{
var rand = data.Item2[0];
data.Item1[i] = new DateStruct(rand.Next(), rand.Next(), rand.Next());
},
(data, i) =>
{
DateStruct value;
lock (rand_lock)
{
var rand = data.Item2[0];
value = new DateStruct(rand.Next(), rand.Next(), rand.Next());
}
data.Item1[i] = value;
},
(data, i, id) =>
{
var rand = data.Item2[id];
data.Item1[i] = new DateStruct(rand.Next(), rand.Next(), rand.Next());
});
}
class DateClass
{
public int year;
public int month;
public int day;
public DateClass(int year, int month, int day)
{
this.year = year;
this.month = month;
this.day = day;
}
}
public static void DateClass_Test()
{
var rand_lock = new object();
RunTest("Date Class", size, () =>
{
return (new DateClass[size], new Random[Environment.ProcessorCount].Fill(() => new Random()));
},
(data, i) =>
{
var rand = data.Item2[0];
data.Item1[i] = new DateClass(rand.Next(), rand.Next(), rand.Next());
},
(data, i) =>
{
DateClass value;
lock (rand_lock)
{
var rand = data.Item2[0];
value = new DateClass(rand.Next(), rand.Next(), rand.Next());
}
data.Item1[i] = value;
},
(data, i, id) =>
{
var rand = data.Item2[id];
data.Item1[i] = new DateClass(rand.Next(), rand.Next(), rand.Next());
});
}
public static void Lookup_Test()
{
RunTest("Lookup", size, () =>
{
var rand = new Random();
return (new int[size].Fill(() => rand.Next() % 100), new int[100].Fill(() => rand.Next() % 1000), new int[1000].Fill(() => rand.Next()));
},
(data, i) =>
{
data.Item1[i] = data.Item3[data.Item2[data.Item1[i]]];
},
(data, i) =>
{
data.Item1[i] = data.Item3[data.Item2[data.Item1[i]]];
},
(data, i, id) =>
{
data.Item1[i] = data.Item3[data.Item2[data.Item1[i]]];
});
}
public static void Min_Test()
{
object min_lock = new object();
RunTest("Min", size, () =>
{
var rand = new Random();
var array = new int[size].Fill(() => rand.Next());
var minValues = new int[Environment.ProcessorCount].Fill(() => int.MaxValue);
var minIndices = new int[Environment.ProcessorCount].Fill(() => -1);
return (array, minValues, minIndices);
},
(data, i) =>
{
var array = data.Item1;
if (array[i] < data.Item2[0])
{
data.Item2[0] = array[i];
data.Item3[0] = i;
}
},
(data, i) =>
{
var array = data.Item1;
if (array[i] < data.Item2[0])
{
lock (min_lock)
{
data.Item2[0] = array[i];
data.Item3[0] = i;
}
}
},
(data, i, id) =>
{
var array = data.Item1;
if (array[i] < data.Item2[id])
{
data.Item2[id] = array[i];
data.Item3[id] = i;
}
});
}
class Wrapper<T>
{
public T value;
public Wrapper(T value)
{
this.value = value;
}
public static implicit operator Wrapper<T>(T value) => new Wrapper<T>(value);
}
public static void Min_Wrapper_Test()
{
object min_lock = new object();
//wrap the values, so they don't share the cache line
RunTest("Min with wrapper", size, () =>
{
var rand = new Random();
var array = new int[size].Fill(() => rand.Next());
var minValues = new Wrapper<int>[Environment.ProcessorCount].Fill(() => int.MaxValue);
var minIndices = new Wrapper<int>[Environment.ProcessorCount].Fill(() => -1);
return (array, minValues, minIndices);
},
(data, i) =>
{
var array = data.Item1;
if (array[i] < data.Item2[0].value)
{
data.Item2[0] = array[i];
data.Item3[0] = i;
}
},
(data, i) =>
{
var array = data.Item1;
if (array[i] < data.Item2[0].value)
{
lock (min_lock)
{
data.Item2[0] = array[i];
data.Item3[0] = i;
}
}
},
(data, i, id) =>
{
var array = data.Item1;
if (array[i] < data.Item2[id].value)
{
data.Item2[id] = array[i];
data.Item3[id] = i;
}
});
}
public static void RunTest<T>(string name, int size,
Func<T> DataIntializer,
Action<T, int> excute,
Action<T, int> parallelForExecute,
Action<T, int, int> parallelExecute)
{
Log(name);
T data = DataIntializer();
var watch = Stopwatch.StartNew();
for (int i = 0; i < size; i++)
excute(data, i);
Log($"single thread time {watch.ElapsedTicks / 10000f} ms");
data = DataIntializer();
watch.Restart();
Parallel.For(0, size, (i) =>
{
excute(data, i);
});
Log($"multi thread (parallel for) time {watch.ElapsedTicks / 10000f} ms");
data = DataIntializer();
watch.Restart();
For(size, (i, id) =>
{
parallelExecute(data, i, id);
});
Log($"multi thread (custom) time {watch.ElapsedTicks / 10000f} ms\n");
}
public static void For(int size, Action<int, int> excute)
{
Task[] tasks = new Task[Environment.ProcessorCount];
int seg = size / Environment.ProcessorCount;
int r = size - seg * Environment.ProcessorCount;
int last = 0;
for (int p = 0; p < tasks.Length; p++)
{
int start = last;
int end = last + seg + (r-- > 0 ? 1 : 0);
int id = p;
last = end;
tasks[p] = Task.Run(() =>
{
for (int i = start; i < end; i++)
excute(i, id);
});
}
Task.WaitAll(tasks);
}
public static void Log(object text)
{
Console.WriteLine(text);
}
}