0

So i'm using busboy as my middleware to stream form data with CSV files in it in my express server. These CSV files can have a varying number of extra config parameters so i need to parse the first line to work out how many parameters there are before initialising the pipe to the csv-parser. My approach looks something like this:

// HTML Form parser middleware for dealing with file uploads
router.post("*", (req: Request, res: Response, next: NextFunction) => {

    let busboy = new Busboy({ headers: req.headers });

    busboy.on("file", (fieldname, file, filename, encoding, mimetype) => {
        file.on("end", () => {
            console.log("File [" + fieldname + "] Finished");
        });

        // number of CSV parameters to be found by splitting first line
        let paramsLen: number;

        // first line varible. Outside data callback incase first line is split over multiple data chunks
        let firstLine = "";

        // line split regex. works from new line and EOF
        const lineSplitReg: RegExp = /[\n\Z]/;

        return new Promise((f, r) => {
          file.on("data", data => {
              console.log("File [" + fieldname + "] got " + data.length + " bytes");
              if (!paramsLen) {
                  let strChunk = data.toString();
                  if (lineSplitReg.test(strChunk)) {
                      firstLine += strChunk.split(lineSplitReg)[0];
                      paramsLen = firstLine.split(",").length;

                      // paramsLen now found! init pipe to csv writeable
                      f();

                  } else {
                      // long line. contiune reading in next data chunk
                      firstLine += strChunk;
                  }
              }
          });
        })
        .then(() => {
          let headers: string[] = [
              "id",
              "brand",
              "product",
              "serialNumber",
              "site",
              "area",
              "location",
              "longitude",
              "latitude",
          ];

          // add extra config headers once paramsLen has been discovered
          let cNum = 1;
          for (let i = headers.length; i < paramsLen; i = i + 2) {
              headers.push(`c${cNum}`);
              headers.push(`v${cNum}`);
              cNum++;
          }

          file.pipe(
              csv({
                headers,
              }),
          );
        })
    });

    busboy.on("finish", () => {
        console.log("Done parsing form!");
        if (!importingDevicesFromCsv) {
            fulfill();
        }
    });

    req.pipe(busboy);
})

The problem is that by the time the promise is fulfilled the file readable stream has already consumed some or all of the file data which means those chunks never get passed to the csv readable stream. So how can i read the stream data but not consume it till the pipe to the csv parser is established given that we may have to read over multiple data chunks before hand?

wizzfizz94
  • 1,288
  • 15
  • 20

1 Answers1

0

My solution was to create a promise that wrapped a transform stream that read data but didn't consume it and held the data in a array (including release callback). When paramsLen was discovered the promise was fulfilled with the transform object then the pipe was established and finally the withheld data in the tranform stream was drained. See below:

// HTML Form parser middleware for dealing with file uploads
router.post("*", (req: Request, res: Response, next: NextFunction) => {

    let busboy = new Busboy({ headers: req.headers });

    busboy.on("file", (fieldname, file, filename, encoding, mimetype) => {
        file.on("end", () => {
            console.log("File [" + fieldname + "] Finished");
        });

        file.on("data", data => {
            console.log("File [" + fieldname + "] got " + data.length + " bytes");
        });

        return new Promise((f, r) => {

          let ts: {
              dataArray: Array<[Buffer, Function]>;
              paramsLen: number;
              firstLine: string;
              lineSplitReg: RegExp;
              stream: Transform;
              drainDone: boolean;
              drain(): void;
          } = {
              dataArray: [],
              paramsLen: undefined,
              firstLine: "",
              lineSplitReg: /[\n\Z]/,
              drainDone: false,
              drain: () => {
                  ts.dataArray.forEach(x => {
                      x[1](null, x[0]);
                  });
                  ts.drainDone = true;
              },
              stream: new Transform({
                  transform: (data: Buffer, enc, callback: Function) => {
                      // if drain finished pass data straight through
                      if (ts.drainDone) {
                          return callback(null, data);
                      }

                      ts.dataArray.push([data, callback]);

                      if (!ts.paramsLen) {
                          let strChunk = data.toString();
                          if (ts.lineSplitReg.test(strChunk)) {
                              ts.firstLine += strChunk.split(ts.lineSplitReg)[0];
                              ts.paramsLen = ts.firstLine.split(",").length;
                              f(ts);
                          } else {
                              // long line. contiune reading in next data chunk
                              ts.firstLine += strChunk;
                          }
                      }
                  },
              }),
          };

          file.pipe(ts);
        })
        .then(ts => {
          let headers: string[] = [
              "id",
              "brand",
              "product",
              "serialNumber",
              "site",
              "area",
              "location",
              "longitude",
              "latitude",
          ];

          // add extra config headers once paramsLen has been discovered
          let cNum = 1;
          for (let i = headers.length; i < paramsLen; i = i + 2) {
              headers.push(`c${cNum}`);
              headers.push(`v${cNum}`);
              cNum++;
          }

          ts.stream.pipe(
            csv({
                headers,
            }),
          );

          // drain transform stream
          ts.drain();
        })
    });

    busboy.on("finish", () => {
        console.log("Done parsing form!");
        if (!importingDevicesFromCsv) {
            fulfill();
        }
    });

    req.pipe(busboy);
})
wizzfizz94
  • 1,288
  • 15
  • 20