I have a scenario where I need to convert an MS Excel file to .CSV format in Net core 3.1 application. The data in the Excel file is not in tabular format, some places having merged cells or some headers information and then tabular data.
I tried to read that data and put it in a DataTable using ClosedXMl but it treats it differently since the initial few rows are not in line with the rest of the data which causes trimming a few columns.
My Excel looks like
Here, the first 2 rows are spanned across 3 columns only.
Below is my code using closedXML.
public static DataTable GetData(string filename)
{
DataTable dt = new DataTable();
//Open the Excel file using ClosedXML.
using (XLWorkbook workBook = new XLWorkbook(filename))
{
//Read the first Sheet from Excel file.
IXLWorksheet workSheet = workBook.Worksheet(1);
//Create a new DataTable.
//Loop through the Worksheet rows.
int rowsToSkip = 1;
foreach (IXLRow row in workSheet.Rows())
{
var cellsCount = row.Cells().Count();
////Use the first row to add columns to DataTable.
if (rowsToSkip <= 3)
{
foreach (IXLCell cell in row.Cells())
{
if (!string.IsNullOrEmpty(cell.Value.ToString()))
{
dt.Columns.Add(cell.Value.ToString());
}
else
{
break;
}
}
}
else
{
int i = 0;
DataRow toInsert = dt.NewRow();
foreach (IXLCell cell in row.Cells())
{
try
{
toInsert[i] =Convert.ToString(cell.Value);
}
catch (Exception ex)
{
//Handle this, or don't.
}
i++;
}
dt.Rows.Add(toInsert);
}
rowsToSkip++;
}
return dt;
}
}
public static void ToCSV(DataTable dtDataTable, string strFilePath)
{
StreamWriter sw = new StreamWriter(strFilePath, false);
//headers
for (int i = 0; i < dtDataTable.Columns.Count; i++)
{
sw.Write(dtDataTable.Columns[i]);
if (i < dtDataTable.Columns.Count - 1)
{
sw.Write(",");
}
}
sw.Write(sw.NewLine);
foreach (DataRow dr in dtDataTable.Rows)
{
for (int i = 0; i < dtDataTable.Columns.Count; i++)
{
if (!Convert.IsDBNull(dr[i]))
{
string value = dr[i].ToString();
if (value.Contains(','))
{
value = String.Format("\"{0}\"", value);
sw.Write(value);
}
else
{
sw.Write(dr[i].ToString());
}
}
if (i < dtDataTable.Columns.Count - 1)
{
sw.Write(",");
}
}
sw.Write(sw.NewLine);
}
sw.Close();
}
In GetData(), it only creates 3 columns in the data table since initial rows are spanned across 3 columns only.
I highly appreciate any inputs to handle this scenario when I can put unstructured data in the data table and write to CSV. I am open to other open-source packages too like EPPlus 4.5 or similar.