With node.js using fast-csv package, I currently have this parsing function, which reads csv file, change headers, go through each row and fire an event based on row data.
validateRows: (filePath, payload, validators) => new Promise((resolve, reject) => {
const invalidRecords = [];
const validRecords = [];
fs.createReadStream(filePath)
.pipe(csv.parse({
headers: (headers) => mapHeaderToRelated(headers, payload), delimiter: ";", discardUnmappedColumns: true
}))
.validate((data, cb) => {
const errors = validators.reduce((err, func) => [...err, ...func(data)], []);
if (errors.length > 0) {
return cb(null, false, errors);
}
return cb(null, true);
})
.on("error", (error) => {
console.log("There is some error");
reject(error);
})
.on("data", (row) => {
validRecords.push(row);
})
.on("data-invalid", (row, rowNumber, reason) => {
invalidRecords.push({
data: row,
rowNumber: rowNumber,
reason: reason
});
})
.on("end", (rowCount) => {
console.log(`Parsed ${rowCount} rows. Valid Count: ${validRecords.length} Invalid Count: ${invalidRecords.length}`);
resolve({
invalidRecords,
validRecords
});
});
}),
I need to detect records that occur multiple times checking on number. If there is duplication, like multiple rows having the same phone number, they should be considered as invalid and pushed to the invalid records array
Example CSV:
| name | surname | gender | phone |
| ------ | ------- | -------- | ----- |
| John | Doe | Male | 123456 |
| Joh | Deo | Unknown | 123456 |
| Jane | Doe | Female | 999999 |
The output I'd like from the parsed CSV:
{
validRecords: [
{
name: Jane
surname: Doe
gender: Female
phone: 99999
}
]
invalidRecords: [
{
data: {
name: John
surname: Doe
gender: Male
phone: 123456
}
rowNumber: 1,
reason: ["Duplicate data"]
},
{
data: {
name: Joh
surname: Deo
gender: Male
phone: 123456
}
rowNumber: 2,
reason: ["Duplicate data"]
}
]
]
How could I approach this problem?