In my case I was attempting insert using the PostgreSQL COPY
command and ran into this issue. I had to parse every line into a new file. I used NodeJS to create a little utility for this and maybe it will help somebody searching the same question:
const fs = require('fs');
const path = require('path');
const readline = require('readline');
/**
* @description formatLine should handle escaping commas w/in a representative cell.
* @example 'row1, row2, "row3,with,embedded commas", row4
* @param {string} str - Line to format and return.
* @returns {string} - 'row1, row2, "row3\,with\,embedded commas", row4
*/
const formatLine = (str) => {
const regexp = /"(.*?)"/g;
const array = [...str.matchAll(regexp)];
// line growth is equal to the number of backslashes added.
let additionsLength = 0;
let formattedLine = '';
if (array.length > 0) {
array.forEach(item => {
console.log('item: ', item);
const index = item.index;
const length = item[0].length;
const pieces = item[0].split(/,/g);
const insertMe = pieces.join('\\,');
const beginning = (formattedLine) ? formattedLine.substring(0, index + additionsLength) : str.substring(0, index);
const end = (formattedLine) ? formattedLine.substring(index + length + additionsLength) : str.substring(index + length);
console.log(''.padEnd(50, '='));
formattedLine = beginning + insertMe + end;
additionsLength += pieces.length - 1;
});
return formattedLine;
} else {
return str;
}
};
const writeStream = fs.createWriteStream(path.join('/postgres/', 'formatted.csv'));
const input = fs.createReadStream(path.join('/postgres/', 'input.csv'));
const rl = readline.createInterface({
input,
crlfDelay: Infinity
});
writeStream.on('open', () => {
rl.on('line', (line) => {
line = formatLine(line);
writeStream.write(line + '\n');
}).on('close', () => {
writeStream.end();
});
});