19

I am trying to convert .xlsx file to .csv, convertion is happening but the data is not formatted properly. Please find code below and suggest changes to the code.

Here I am trying to read an .xlsx file and write it to a csv file i.e. converting xlsx to csv but I am not getting the .csv file in proper format all the data is displayed in a single but it must displayed like rows in Excel.

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class XlsxtoCSV {

    static void xlsx(File inputFile, File outputFile) {
        // For storing data into CSV files
        StringBuffer data = new StringBuffer();

        try {
            FileOutputStream fos = new FileOutputStream(outputFile);
            // Get the workbook object for XLSX file
            XSSFWorkbook wBook = new XSSFWorkbook(new FileInputStream(inputFile));
            // Get first sheet from the workbook
            XSSFSheet sheet = wBook.getSheetAt(0);
            Row row;
            Cell cell;
            // Iterate through each rows from first sheet
            Iterator<Row> rowIterator = sheet.iterator();

            while (rowIterator.hasNext()) {
                row = rowIterator.next();

                // For each row, iterate through each columns
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {

                    cell = cellIterator.next();

                    switch (cell.getCellType()) {
                        case Cell.CELL_TYPE_BOOLEAN:
                            data.append(cell.getBooleanCellValue() + ",");

                            break;
                        case Cell.CELL_TYPE_NUMERIC:
                            data.append(cell.getNumericCellValue() + ",");

                            break;
                        case Cell.CELL_TYPE_STRING:
                            data.append(cell.getStringCellValue() + ",");
                            break;

                        case Cell.CELL_TYPE_BLANK:
                            data.append("" + ",");
                            break;
                        default:
                            data.append(cell + ",");

                    }
                }
            }

            fos.write(data.toString().getBytes());
            fos.close();

        } catch (Exception ioe) {
            ioe.printStackTrace();
        }
    }
    //testing the application 

    public static void main(String[] args) {
        //reading file from desktop
        File inputFile = new File("C:\\Users\\user69\\Desktop\\test.xlsx");
        //writing excel data to csv 
        File outputFile = new File("C:\\Users\\user69\\Desktop\\test1.csv");
        xlsx(inputFile, outputFile);
    }
}
Ry-
  • 218,210
  • 55
  • 464
  • 476
user2335416
  • 311
  • 2
  • 4
  • 11

3 Answers3

16

This is thanks @Swapnil!

data.append("\r\n"); // After the columns have been appended.

The following was edited (added) by @Abdullah My original answer above does not has that much impact, but Abdullah's edit shows much effort, so I leave it for those that encounter this question&answer.

public class App {

    public void convertExcelToCSV(Sheet sheet, String sheetName) {
        StringBuilder data = new StringBuilder();
        try {
            Iterator<Row> rowIterator = sheet.iterator();
            while (rowIterator.hasNext()) {
                Row row = rowIterator.next();
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();

                    CellType type = cell.getCellTypeEnum();
                    if (type == CellType.BOOLEAN) {
                        data.append(cell.getBooleanCellValue());
                    } else if (type == CellType.NUMERIC) {
                        data.append(cell.getNumericCellValue());
                    } else if (type == CellType.STRING) {
                        String cellValue = cell.getStringCellValue();
                        if(!cellValue.isEmpty()) {
                            cellValue = cellValue.replaceAll("\"", "\"\"");
                            data.append("\"").append(cellValue).append("\"");
                        }
                    } else if (type == CellType.BLANK) {
                    } else {
                        data.append(cell + "");
                    }
                    if(cell.getColumnIndex() != row.getLastCellNum()-1) {
                        data.append(",");
                    }
                }
                data.append('\n');
            }
            Files.write(Paths.get("C:\\Users\\" + sheetName + ".csv"),
                data.toString().getBytes("UTF-8"));
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String [] args)
    {
        App app = new App();
        String path =  "C:\\Users\\myFile.xlsx";
        try (InputStream inp = new FileInputStream(path)) {
            Workbook wb = WorkbookFactory.create(inp);

            for (int i = 0; i < wb.getNumberOfSheets(); i++) {
                System.out.println(wb.getSheetAt(i).getSheetName());
                app.convertExcelToCSV(wb.getSheetAt(i), wb.getSheetAt(i).getSheetName());
            }
        } catch (Exception ex) {
            System.out.println(ex.getMessage());
        } 
    }
}
StevenWernerCS
  • 839
  • 9
  • 15
Joop Eggen
  • 107,315
  • 7
  • 83
  • 138
  • This code works well. Only thing I miss is to determine on NUMERIC whether they're not of type date. I use following statement in `CellType.NUMERIC` branch: ` if (DateUtil.isCellDateFormatted(cell)) { SimpleDateFormat simpleDateFormat = new SimpleDateFormat(dateFormat); data.append(simpleDateFormat.format(cell.getDateCellValue())); } else { data.append(cell.getNumericCellValue()); } ` – michal.jakubeczy Jan 05 '23 at 09:51
5

Use Commons CSV to encode cell values, that's much more robust. Unfortunately, some extra code is still needed to iterate over the Sheet by Row/Cell and call Commons CSV on each cell (XSSF doesn't provide that), but at least the actual cell value being written is guaranteed to be standard CSV (i.e. you don't need to worry about escaping chars or adding commas yourself).

Maven add for Commons CSV:

<dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-csv</artifactId>
            <version>1.5</version>
        </dependency>   

Once Commons CSV is available, this is the code to export a Workbook to CSV. This examples writes to an OutputStream, but File output is also easy.

// Convert an XSSFWorkbook to CSV and write to provided OutputStream
private void writeWorkbookAsCSVToOutputStream(XSSFWorkbook workbook, OutputStream out) {        
    CSVPrinter csvPrinter = null;       
    try {       
        csvPrinter = new CSVPrinter(new OutputStreamWriter(out), CSVFormat.DEFAULT);                

        if (workbook != null) {
            XSSFSheet sheet = workbook.getSheetAt(0); // Sheet #0 in this example
            Iterator<Row> rowIterator = sheet.rowIterator();
            while (rowIterator.hasNext()) {               
                Row row = rowIterator.next();
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();
                    csvPrinter.print(cell.getStringCellValue());
                }                   
                csvPrinter.println(); // Newline after each row
            }               
        }

    }
    catch (Exception e) {
        log.error("Failed to write CSV file to output stream", e);
    }
    finally {
        try {
            if (csvPrinter != null) {
                csvPrinter.flush(); // Flush and close CSVPrinter
                csvPrinter.close();
            }
        }
        catch (IOException ioe) {
            log.error("Error when closing CSV Printer", ioe);
        }           
    }
}   
gene b.
  • 10,512
  • 21
  • 115
  • 227
  • Using `cell.getStringCellValue()` on all cells leads to errors on cells which have different types (e.g. number, date, etc.). You need to determine cell type first (use `CellType type = cell.getCellTypeEnum()`) and then via switch statement get the value. – michal.jakubeczy Jan 05 '23 at 09:48
0
public static void convertToXlsx(File inputFile, File outputFile) {
StringBuffer bf = new StringBuffer();
    FileOutputStream fos = null;
    String strGetValue = "";
    try {
        fos = new FileOutputStream(outputFile);
        XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(inputFile));
        XSSFSheet sheet = wb.getSheetAt(0);
        Row row;
        Cell cell;
        int intRowCounter = 0;
        Iterator<Row> rowIterator = sheet.iterator();
        while (rowIterator.hasNext()) {
            StringBuffer cellDData = new StringBuffer();
            row = rowIterator.next();
            int maxNumOfCells = sheet.getRow(0).getLastCellNum();
            int cellCounter = 0;
            while ((cellCounter) < maxNumOfCells) {
                if (sheet.getRow(row.getRowNum()) != null
                        && sheet.getRow(row.getRowNum()).getCell(cellCounter) != null) {
                    cell = sheet.getRow(row.getRowNum()).getCell(cellCounter);
                    switch (cell.getCellType()) {
                    case Cell.CELL_TYPE_BOOLEAN:
                        strGetValue = cell.getBooleanCellValue() + ",";
                        cellDData.append(removeSpace(strGetValue));
                        break;
                    case Cell.CELL_TYPE_NUMERIC:
                        strGetValue = new BigDecimal(cell.getNumericCellValue()).toPlainString();
                        if (DateUtil.isCellDateFormatted(cell)) {
                            strGetValue = new DataFormatter().formatCellValue(cell);
                        } else {
                            strGetValue = new BigDecimal(cell.getNumericCellValue()).toPlainString();
                        }
                        String tempStrGetValue = removeSpace(strGetValue);
                        if (tempStrGetValue.length() == 0) {
                            strGetValue = " ,";
                            cellDData.append(strGetValue);
                        } else {
                            strGetValue = strGetValue + ",";
                            cellDData.append(removeSpace(strGetValue));
                        }
                        break;
                    case Cell.CELL_TYPE_STRING:
                        strGetValue = cell.getStringCellValue();
                        String tempStrGetValue1 = removeSpace(strGetValue);
                        if (tempStrGetValue1.length() == 0) {
                            strGetValue = " ,";
                            cellDData.append(strGetValue);
                        } else {
                            strGetValue = strGetValue + ",";
                            cellDData.append(removeSpace(strGetValue));
                        }
                        break;
                    case Cell.CELL_TYPE_BLANK:
                        strGetValue = "" + ",";
                        cellDData.append(removeSpace(strGetValue));
                        break;
                    default:
                        strGetValue = cell + ",";
                        cellDData.append(removeSpace(strGetValue));
                    }
                } else {
                    strGetValue = " ,";
                    cellDData.append(strGetValue);
                }
                cellCounter++;
            }
            String temp = cellDData.toString();
            if (temp != null && temp.contains(",,,")) {
                temp = temp.replaceFirst(",,,", ", ,");
            }
            if (temp.endsWith(",")) {
                temp = temp.substring(0, temp.lastIndexOf(","));
                cellDData = null;
                bf.append(temp.trim());
            }
            bf.append("\n");
            intRowCounter++;
        }
        fos.write(bf.toString().getBytes());
        fos.close();
    } catch (Exception ex) {
        ex.printStackTrace();
    } finally {
        try {
            if (fos != null)
                fos.close();
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}
private static String removeSpace(String strString) {
    if (strString != null && !strString.equals("")) {
        return strString.trim();
    }
    return strString;
}

  1. code example handle , and space character in cell

    #


Ashish Yadav
  • 7
  • 1
  • 1
  • 6
  • I'm getting a premature end of file error. Do you know how to solve this? Process terminating -- some documents reached an exception which is set to halt all documents: Error in the Process [sub]_ihm-037_FinancialHierarchy Process terminating -- some documents reached an exception which is set to halt all documents: Error executing data process; Caused by: Premature end of file. (in groovy2 script); Caused by: Premature end of file. – NK7983 Jun 18 '20 at 21:04