I have an excel file with 100,000 rows and 20 columns each. I am fetching the records using ClosedXml and then converting it into JSON using Json.Net library. However it seems that my memory consumption is increasing a lot. It shots up by about 2.5GB on running the code. Below is my code
public class Reader : IReader
{
public JObject Convert(string excelFile, string sheetName, List<string> columns)
{
var workbook = new XLWorkbook(excelFile, XLEventTracking.Disabled);
var worksheet = workbook.Worksheet(sheetName);
var headers = worksheet.FirstRowUsed();
ValidateColumns(headers, columns);
var itemRow = headers.RowBelow();
var ds = new JObject();
var rowCount = 1;
while (!itemRow.Cell(1).IsEmpty())
{
rowCount++;
var jRow = ConvertRowToArray(itemRow, columns);
var key = CreateKey(itemRow);
if (ds[key] != null)
((JArray)ds[key]).Add(jRow);
else
ds.Add(key, new JArray(jRow));
itemRow = itemRow.RowBelow();
jRow = null;
key = null;
}
return ds;
}
private JObject ConvertRowToArray(IXLRow row, List<string> columns)
{
var jRow = new JObject();
for (var iterator = 1; iterator <= columns.Count; iterator++)
{
var jVal = new JObject();
jVal.Add("Value", row.Cell(iterator).Value.ToString());
jVal.Add("Error", new JArray());
jRow.Add(columns[iterator - 1], jVal);
jVal = null;
}
return jRow;
}
private string CreateKey(IXLRow row)
{
return string.Format("{0}-{1}-{2}", row.Cell(2).Value.ToString(), row.Cell(5).Value.ToString(), row.Cell(6).Value.ToString());
}
private void ValidateColumns(IXLRow row, List<string> columns)
{
for (var index = 0; index < columns.Count; index++)
{
if (columns[index] != row.Cell(index + 1).Value.ToString())
throw new FormatException("Column Mismatch");
}
}
}
The conversion is something like this:
| Col1 | Col2 | Col3 |
| V1 | V2 | V3 |
should get transferred to:
{
"items": {
"Value": [
{
"Col1": {
"Value": "V1",
"Error": [ ]
},
"Col2": {
"Value": "V2",
"Error": [ ]
},
"Col3": {
"Value": "V3",
"Error": [ ]
}
}
]
}
}
There is no issue in converting a small excel file, but I am facing high memory utilization issue on using a larger file. I am new to Json.Net so not sure of any tips or best practices for reducing the memory consumption. Any help will be appreciated.
Update
From VS2015 diagnostic tools it looks like the culprit is Newtonsoft.Json.Linq.JProperty and JArray.