0

The scenario is I have two large CSV files csv1.csv and csv2.csv. In both the files, there is an email column and I have to read csv1.csv row by row and check if the email exists in csv2.csv and if matches write the row of csv2.csv in csv3.csv. I have tried read stream as well but it is not working as expected. Any guidance or help is appreciated.

Thanks to all in advance.

Following are the CSV files

csv1.csv

email,header1,header2
test1@example.com,test1,test1
test2@example.com,test2,test2
test3@example.com,test3,test3
test4@example.com,test4,test4
test5@example.com,test5,test5

csv2.csv

email,header1,header2
test4@example.com,test4,test4
test5@example.com,test5,test5
test6@example.com,test6,test6
test7@example.com,test7,test7
test8@example.com,test8,test8

Following is the code that I tried

const fs = require('fs');
const csv = require('fast-csv')

class CsvHelper {
  static write(filestream, rows, options) {
    return new Promise((res, rej) => {
      csv.writeToStream(filestream, rows, options)
        .on('error', err => rej(err))
        .on('finish', () => res());
    });
  }

  constructor(opts) {
    this.headers = opts.headers;
    this.path = opts.path;
    this.writeOpts = {
      headers: this.headers,
      includeEndRowDelimeter: true
    };
  }

  create(rows) {
    return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, { ...this.writeOpts });
  }

  append(rows) {
    return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, {
      ...this.writeOpts,
      writeHeaders: false,
    });
  }
}

class Helper {
  async matchCsv (outerRow) {
    try {
      const filePath2 = "csv2.csv";
      const filePath3 = "csv3.csv";
      let row = [];

      const csvFile = new CsvHelper({
        path: filePath3,
        headers: ["Email", "Active"]
      });

      return new Promise((resolve, reject) => {
        fs.createReadStream(filePath2)
        .on("error", err => {
          reject(err);
        })
        .pipe(csv.parse({headers: true}))
        .on("error", err => {
          reject(err);
        })
        .on("data", async innerRow => {
          if(outerRow["email"] === innerRow["email"]) {
            console.log("====================");
            console.log("match found");
            console.log(innerRow);
            console.log("====================");
            row.push([innerRow["email"], "yes"]);
            console.log("row: ", row);
          }
        })
        .on("finish", async() => {
          if (!fs.existsSync(filePath3)) {
            await csvFile.create(row).then(() => {
              resolve("Done from matchCsv");
            })
          } else {
            await csvFile.append(row).then(() => {
              resolve("Done from matchCsv");
            })
          }
        })
      });
    } catch (err) {
      throw(err);
    }
  }

  async generateCsv () {
    try {
      const filePath1 = "csv1.csv";

      return new Promise((resolve, reject) => {
        fs.createReadStream(filePath1)
        .on("error", err => {
          reject(err);
        })
        .pipe(csv.parse({headers: true}))
        .on("error", err => {
          reject(err);
        })
        .on("data", async outerRow => {
          const result = await this.matchCsv(outerRow);
          console.log("result: ", result);
        })
        .on("finish", () => {
          resolve("Generated csv3.csv file.");
        });
      });
    } catch (err) {
      throw(err);
    }
  }
}


async function main() {
  const helper = new Helper();
  const result = await helper.generateCsv()
  console.log(result);
}

main();

Himadri Ganguly
  • 715
  • 4
  • 11
  • 31

1 Answers1

1

So the question is a little confusing, but I think I know what you want. Here's what I would do to check if the email exists. It will add all the rows to an array, cycle through them, then if the email address matches the email you're looking for, it will do something else... I think you said you wanted to write to a csv file again with the row, but that should be simple enough.

const csv = require('csv-parser');
const fs = require('fs');

const filepath = "./example_data.csv";

const emailAdd = "myemail@email.com";
var rowsArr = [];

fs.createReadStream(filepath)
    .on('error', () => {
        // handle error
    })

    .pipe(csv())
    .on('data', (row) => {
        rowsArr.push(row);
    })

    .on('end', () => {
        for (var i = 0; i <= rowsArr.length; i++) {
            if (rowsArr[i].emailAddress == emailAdd) {
                //do something
            }
        }
    })
LUKER
  • 540
  • 1
  • 5
  • 23