0

I have an excel file with 100,000 rows and 20 columns each. I am fetching the records using ClosedXml and then converting it into JSON using Json.Net library. However it seems that my memory consumption is increasing a lot. It shots up by about 2.5GB on running the code. Below is my code

public class Reader : IReader
{
    public JObject Convert(string excelFile, string sheetName, List<string> columns)
    {
        var workbook = new XLWorkbook(excelFile, XLEventTracking.Disabled);
        var worksheet = workbook.Worksheet(sheetName);

        var headers = worksheet.FirstRowUsed();
        ValidateColumns(headers, columns);

        var itemRow = headers.RowBelow();
        var ds = new JObject();
        var rowCount = 1;
        while (!itemRow.Cell(1).IsEmpty())
        {
            rowCount++;
            var jRow = ConvertRowToArray(itemRow, columns);
            var key = CreateKey(itemRow);

            if (ds[key] != null)
                ((JArray)ds[key]).Add(jRow);
            else
                ds.Add(key, new JArray(jRow));

            itemRow = itemRow.RowBelow();
            jRow = null;
            key = null;
        }
        return ds;
    }
    private JObject ConvertRowToArray(IXLRow row, List<string> columns)
    {
        var jRow = new JObject();
        for (var iterator = 1; iterator <= columns.Count; iterator++)
        {
            var jVal = new JObject();
            jVal.Add("Value", row.Cell(iterator).Value.ToString());
            jVal.Add("Error", new JArray());
            jRow.Add(columns[iterator - 1], jVal);
            jVal = null;

        }
        return jRow;
    }

    private string CreateKey(IXLRow row)
    {
        return string.Format("{0}-{1}-{2}", row.Cell(2).Value.ToString(), row.Cell(5).Value.ToString(), row.Cell(6).Value.ToString());
    }
    private void ValidateColumns(IXLRow row, List<string> columns)
    {
        for (var index = 0; index < columns.Count; index++)
        {
            if (columns[index] != row.Cell(index + 1).Value.ToString())
                throw new FormatException("Column Mismatch");
        }
    }
}

The conversion is something like this:
| Col1 | Col2 | Col3 |
| V1 | V2 | V3 |
should get transferred to:

    {
  "items": {
    "Value": [
      {
        "Col1": {
          "Value": "V1",
          "Error": [ ]
        },
        "Col2": {
          "Value": "V2",
          "Error": [ ]
        },
        "Col3": {
          "Value": "V3",
          "Error": [ ]
        }
      }
    ]
  }
}

There is no issue in converting a small excel file, but I am facing high memory utilization issue on using a larger file. I am new to Json.Net so not sure of any tips or best practices for reducing the memory consumption. Any help will be appreciated.

Update
From VS2015 diagnostic tools it looks like the culprit is Newtonsoft.Json.Linq.JProperty and JArray. Memory utilization snapshot

Pratik Bhattacharya
  • 3,596
  • 2
  • 32
  • 60
  • 1) Your desired JSON is invalid. `items : []` is an array, but its contents look like they are object properties. Upload to https://jsonformatter.curiousconcept.com/ and you will see the problem. 2) Why convert to `JObject` rather than stream? – dbc Apr 05 '16 at 13:46
  • Sorry about the JSON, it was typo error. I have fixed the desired JSON output – Pratik Bhattacharya Apr 05 '16 at 14:01
  • 2
    Use a JsonTextWriter to stream your JSON out to a file rather than building it up in memory. See [Performance Tips](http://www.newtonsoft.com/json/help/html/Performance.htm) in the documentation. – Brian Rogers Apr 05 '16 at 16:01
  • Your code will not produce the JSON you show. I don't see any property names that correspond to the return of `CreateKey(IXLRow row)`. – dbc Apr 05 '16 at 16:17

0 Answers0