1

I am converting xlsx to csv using below program and I would like to add quote characters("") each cell string if it contains line break(/n) or delimiter character.

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Iterator;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class XlsxtoCSV {

    static void xlsx(File inputFile, File outputFile) {
        // For storing data into CSV files
        StringBuffer data = new StringBuffer();

        try {
            FileOutputStream fos = new FileOutputStream(outputFile);
            // Get the workbook object for XLSX file
            XSSFWorkbook wBook = new XSSFWorkbook(new FileInputStream(inputFile));
            // Get first sheet from the workbook
            XSSFSheet sheet = wBook.getSheetAt(0);
            Row row;
            Cell cell;
            // Iterate through each rows from first sheet
            Iterator<Row> rowIterator = sheet.iterator();

            while (rowIterator.hasNext()) {
                row = rowIterator.next();

                // For each row, iterate through each columns
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {

                    cell = cellIterator.next();

                    switch (cell.getCellType()) {
                        case Cell.CELL_TYPE_BOOLEAN:
                            data.append(cell.getBooleanCellValue() + ",");

                            break;
                        case Cell.CELL_TYPE_NUMERIC:
                            data.append(cell.getNumericCellValue() + ",");

                            break;
                        case Cell.CELL_TYPE_STRING:
                            data.append(cell.getStringCellValue() + ",");
                            break;

                        case Cell.CELL_TYPE_BLANK:
                            data.append("" + ",");
                            break;
                        default:
                            data.append(cell + ",");

                    }
                }
            }

            fos.write(data.toString().getBytes());
            fos.close();

        } catch (Exception ioe) {
            ioe.printStackTrace();
        }
    }
    //testing the application 

    public static void main(String[] args) {
        //reading file from desktop
        File inputFile = new File("C:\\Users\\user69\\Desktop\\test.xlsx");
        //writing excel data to csv 
        File outputFile = new File("C:\\Users\\user69\\Desktop\\test1.csv");
        xlsx(inputFile, outputFile);
    }
}

As per RFC4180 Csv rules. Fields containing line breaks (CRLF), double quotes, and comma should be enclosed in double-quotes. Hence I have to format cell data(numeric or String or any other type) if cell data contains line break or delimiter(,) before adding to String buffer.Please help me in formatting cell data according to CSV rules.

Kiran
  • 921
  • 1
  • 11
  • 23

2 Answers2

1

Use a library like commons-csv:

final Appendable out = ...;  
final CSVPrinter printer = CSVFormat.DEFAULT.withHeader("H1", "H2").print(out);
...
while (rowIterator.hasNext()) {
    ...
    while (cellIterator.hasNext()) {
        ...
        printer.print(cell.getStringCellValue());
        ...
    }
    printer.println();
}

See also the short user-guide

centic
  • 15,565
  • 9
  • 68
  • 125
1

Centic's reply is exactly right. Just to expand on what he wrote, here's my complete and tested method which uses Commons CSV for actual value printing. Unfortunately, we still need to iterate over the Sheet, there's no automatic CSV output method in XSSF, but I followed Centic's strategy to do Row/Cell iteration.

This example outputs to an OutputStream, but obviously a File is equally easy (use a FileReader in the CSVPrinter constructor).

// Convert an XSSFWorkbook to CSV and write to provided OutputStream
private void writeWorkbookAsCSVToOutputStream(XSSFWorkbook workbook, OutputStream out) {

    CSVPrinter csvPrinter = null;

    try {
        // Or change this to  File-based constructor, if File output is required
        csvPrinter = new CSVPrinter(new OutputStreamWriter(out), CSVFormat.DEFAULT);                

        if (workbook != null) {
            XSSFSheet sheet = workbook.getSheetAt(0); // Sheet #0
            Iterator<Row> rowIterator = sheet.rowIterator();
            while (rowIterator.hasNext()) {               
                Row row = rowIterator.next();
                Iterator<Cell> cellIterator = row.cellIterator();
                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();
                    csvPrinter.print(cell.getStringCellValue()); // Commons CSV prints here
                }
                // Newline after each row
                csvPrinter.println();
            }

        }

    }
    catch (Exception e) {
        log.error("Failed to write CSV file to output stream", e);
    }
    finally {
        try {
            if (csvPrinter != null) {
                // Close CSVPrinter
                csvPrinter.flush();
                csvPrinter.close();
            }
        }
        catch (IOException ioe) {
            log.error("Error when closing CSV Printer", ioe);
        }           
    }
}   
gene b.
  • 10,512
  • 21
  • 115
  • 227