I am reading excel files using C# and OpenXML (SAX). The file size ranges between 5-10 mb and has 5-6 sheets. Number of rows per sheet vary between 25-100K.
I am using the following code to fetch the data. It's reading about 100 rows per second. In comparison, Apache POI is able to read a thousand rows in the same time.
I expected better results as both products are from Microsoft. Am I doing something wrong?
using (SpreadsheetDocument excelDoc = SpreadsheetDocument.Open(filename, true))
{
WorkbookPart workbookPart = excelDoc.WorkbookPart;
WorksheetPart mainSheet = (WorksheetPart)workbookPart.GetPartById(sheetIds[0].ToString());
OpenXmlReader reader = OpenXmlReader.Create(mainSheet);
//CellType c;
SharedStringTable t = workbookPart.SharedStringTablePart.SharedStringTable;
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
string cellValue;
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
int index = int.Parse(c.CellValue.InnerText);
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
}
else
{
cellValue = c.CellValue.InnerText;
}
//Console.Out.Write("{0}: {1} ", c.CellReference, cellValue);
}
}
while (reader.ReadNextSibling());
}
}
}