I have a single directory in a windows machine with 3.5 million json files ranging from 3 to 30kb. I have some function:
myBuilder(json){
//some stuff producing an object named entry
return entry
}
All I want to do is read every file in the directory, run them through myBuilder and insert them all into the mongo database. I've posted my best attempt below.
What is the simplest way to achieve the desired result?
Notes:
- I have thought that insertMany may be complicated because I would need to build a way to break this operation into chunks as a single array holding the entries would exceed my available ram.
- I can't seem to get glob to work. Could it be a windows based limitation? Could it be a memory based limitation? Either way, for now I would like to avoid answers using glob.
- I would really appreciate someone explaining whether it makes more sense to run many consecutive insertOne operations inside a single instance of connecting to the database, or whether it is necessary to connect and disconnect each time.
SAMPLE CODE:
var fs = require('fs');
var mongodb = require('mongodb');
var MongoClient = mongodb.MongoClient;
var MongoURL = 'mongodb://localhost:27017/my_database_name';
traverseFileSystem('/nodejs/nodetest1/imports');
function traverseFileSystem(path){
var files = fs.readdirSync(currentPath);
for (var i in files) {
var currentFile = path + '/' + files[i];
var stats = fs.statSync(currentFile);
if (stats.isFile()){
var fileText = fs.readFileSync(currentFile,'utf8');
var json= JSON.parse(fileText);
var entry = myBuilder(json); // note this is described above
insertToMongo(entry);
}
}
}
function insertToMongo(entry){
console.log(entry);
MongoClient.connect(MongoURL, function (err, db) {
var collection = db.collection('users');
collection.insert(entry, function (err, result) {
if(err)
console.log("error was"+err);
else
console.log("entry was"+result);
db.close();
});
});
}
This passes (and logs to the console) well formatted entries for every file in the directory. But it does not display a positive error or a result for any entry. Mongo does show that connection is made and it does not display any errors.