I'm facing a weird performance difference when I read data from a large csv file. If I read data and construct a dictionary in the same loop, as the code snippet below shows, then the method will cost about 4.1 seconds to finish the process.
private void ReadFileWorkerRun(object sender, EventArgs e)
{
List<Stock> lineTemp = new List<Stock>();
List<Stock> allStock = new List<Stock>();
List<List<Stock>> orderedAll = new List<List<Stock>>();
Categories = new Dictionary<string, List<Stock>>() { { GlobalVariable.ALL, allStock } };
DictionaryOrder = new List<(string, string)>();
using (StreamReader lines = new StreamReader(FilePath))
{
string line = lines.ReadLine();
// Add each stock to dictionary
while ((line = lines.ReadLine()) != null)
{
Stock temp = new Stock(line);
// This is the upper boundary of the code that will move outside of the using statement
if (!Categories.TryGetValue(temp.StockID, out List<Stock> targetList))
{
targetList = new List<Stock>();
orderedAll.Add(targetList);
Categories.Add(temp.StockID, targetList);
DictionaryOrder.Add((temp.StockID, temp.StockName));
}
targetList.Add(temp);
// This is the lower boundary of the code that will move outside of the using statement
}
}
/*
The code between the boundry is moved here
*/
foreach (List<Stock> stockList in orderedAll)
{
allStock.AddRange(stockList);
}
}
public class Stock
{
public string StockDate { get; set; }
public string StockID { get; set; }
public string StockName { get; set; }
public string SecBrokerID { get; set; }
public string SecBrokerName { get; set; }
public decimal Price { get; set; }
public long BuyQty{ get; set; }
public long SellQty { get; set; }
public Stock(string s)
{
string[] data = s.Split(',');
StockDate = data[0];
StockID = data[1];
StockName = data[2];
SecBrokerID = data[3];
SecBrokerName = data[4];
Price = decimal.Parse(data[5]);
BuyQty = long.Parse(data[6]);
SellQty = long.Parse(data[7]);
}
}
However, when I move the part of the code that constructs the dictionary out of the while loop and put it into a foreach loop, then the time the method took becomes 3.4 seconds. The code in the using statement is separated into the code below:
using (StreamReader lines = new StreamReader(FilePath))
{
string line = lines.ReadLine();
while ((line = lines.ReadLine()) != null)
{
lineTemp.Add(new Stock(line));
}
}
// Add each stock to dictionary
foreach (Stock temp in lineTemp)
{
if (!Categories.TryGetValue(temp.StockID, out List<Stock> targetList))
{
targetList = new List<Stock>();
orderedAll.Add(targetList);
Categories.Add(temp.StockID, targetList);
DictionaryOrder.Add((temp.StockID, temp.StockName));
}
targetList.Add(temp);
}
The only difference between two versions is the code I list in the second part and the time gap is always consistent, no matter how many times I run, so how come the codes with the same logic and data structure act so differently?