1

I am trying to read a big file in chunks instead of loading it directly to memory using nodejs. My goal is to read the file but cannot load it into memory as the file is big and then group the anagrams and then output them.

I started following the article described here

It basically involves creating a shared buffer at the beginning of the program and passing it down.

Essentially it involves the following functions

function readBytes(fd, sharedBuffer) {
  return new Promise((resolve, reject) => {
    fs.read(fd, sharedBuffer, 0, sharedBuffer.length, null, (err) => {
      if (err) {
        return reject(err);
      }
      resolve();
    });
  });
}

async function* generateChunks(filePath, size) {
  const sharedBuffer = Buffer.alloc(size);
  const stats = fs.statSync(filePath); // file details
  const fd = fs.openSync(filePath); // file descriptor
  let bytesRead = 0; // how many bytes were read
  let end = size;

  for (let i = 0; i < Math.ceil(stats.size / size); i++) {
    await readBytes(fd, sharedBuffer);
    bytesRead = (i + 1) * size;
    if (bytesRead > stats.size) {
      // When we reach the end of file,
      // we have to calculate how many bytes were actually read
      end = size - (bytesRead - stats.size);
    }
    yield sharedBuffer.slice(0, end);
  }
}

I then call it in main like the following. My goal is to group all the anagrams and then output them. However the issue I am having is that when I run the program the first 99,000 items I can access via console.log(Object.values(result)[99000]); however after that I am getting undefined. Any ideas what I am doing wrong?

const CHUNK_SIZE = 10000000; // 10MB

async function main() {
  let result = {};
  for await (const chunk of generateChunks("Data/example2.txt", CHUNK_SIZE)) {
    let words = chunk.toString("utf8").split("\n");
    for (let word of words) {
      let cleansed = word.split("").sort().join("");
      if (result[cleansed]) {
        result[cleansed].push(word);
      } else {
        result[cleansed] = [word];
      }
    }
  }
  console.log(Object.values(result)[99000]);
  return Object.values(result);
}
hulike2286
  • 61
  • 5

0 Answers0