Given this PLINQ code:
public static IEnumerable<Tuple<string, string>> PlinqFileProcessingLimitedCores(int nr_of_cores)
{
string archiveDirectory = @"C:\Dotnet46Examples";
return (from file in Directory.EnumerateFiles(archiveDirectory, "*.cs", SearchOption.AllDirectories)
from line in File.ReadLines(file).AsParallel().WithDegreeOfParallelism(nr_of_cores)
where line.Contains("Console")
select new Tuple<string, string>(file, line));
}
which returns all lines of all files containing the word Console.
I tried to write faster asynch versions, however they all turned out to be significantly slower than PLINQ, e.g.:
public static async Task<ConcurrentBag<Tuple<string, string>>> FileProcessingAsync()
{
string archiveDirectory = @"C:\Dotnet46Examples";
var bag = new ConcurrentBag<Tuple<string, string>>();
var tasks = Directory.EnumerateFiles(archiveDirectory, "*.cs", SearchOption.AllDirectories)
.Select(file => ProcessFileAsync(bag, file));
await Task.WhenAll(tasks);
return bag;
}
static async Task ProcessFileAsync(ConcurrentBag<Tuple<string, string>> bag, string file)
{
String line;
using (StreamReader reader = File.OpenText(file))
{
while (reader.Peek() >= 0)
{
line = await reader.ReadLineAsync();
if (line != null)
{
if (line.Contains("Console"))
{
bag.Add(new Tuple<string, string>(file, line));
}
}
}
}
}
Why is the async code so much slower (factor 1000 on my laptop)? How does a better code look like? Is the problem not suited for async? thx