130

I'd like to find all *.html files in src folder and all its sub folders using nodejs. What is the best way to do it?

var folder = '/project1/src';
var extension = 'html';
var cb = function(err, results) {
   // results is an array of the files with path relative to the folder
   console.log(results);

}
// This function is what I am looking for. It has to recursively traverse all sub folders. 
findFiles(folder, extension, cb);

I think a lot developers should have great and tested solution and it is better to use it than writing one myself.

Muhammad Numan
  • 23,222
  • 6
  • 63
  • 80
Nicolas S.Xu
  • 13,794
  • 31
  • 84
  • 129
  • If you want to search files by regex, then use [file-regex](https://www.npmjs.com/package/file-regex) library, which does recursive file search concurrently. – Akash Babu May 23 '20 at 06:18

17 Answers17

127

node.js, recursive simple function:

var path = require('path'),
fs = require('fs');

function fromDir(startPath, filter) {

    //console.log('Starting from dir '+startPath+'/');

    if (!fs.existsSync(startPath)) {
        console.log("no dir ", startPath);
        return;
    }

    var files = fs.readdirSync(startPath);
    for (var i = 0; i < files.length; i++) {
        var filename = path.join(startPath, files[i]);
        var stat = fs.lstatSync(filename);
        if (stat.isDirectory()) {
            fromDir(filename, filter); //recurse
        } else if (filename.endsWith(filter)) {
            console.log('-- found: ', filename);
        };
    };
};

fromDir('../LiteScript', '.html');

add RegExp if you want to get fancy, and a callback to make it generic.

var path = require('path'),
fs = require('fs');

function fromDir(startPath, filter, callback) {

    //console.log('Starting from dir '+startPath+'/');

    if (!fs.existsSync(startPath)) {
        console.log("no dir ", startPath);
        return;
    }

    var files = fs.readdirSync(startPath);
    for (var i = 0; i < files.length; i++) {
        var filename = path.join(startPath, files[i]);
        var stat = fs.lstatSync(filename);
        if (stat.isDirectory()) {
            fromDir(filename, filter, callback); //recurse
        } else if (filter.test(filename)) callback(filename);
    };
};

fromDir('../LiteScript', /\.html$/, function(filename) {
    console.log('-- found: ', filename);
});
Alicia Sykes
  • 5,997
  • 7
  • 36
  • 64
Lucio M. Tato
  • 5,639
  • 2
  • 31
  • 30
116

i like using the glob package:

const glob = require('glob');

glob(__dirname + '/**/*.html', {}, (err, files)=>{
  console.log(files)
})
richardpringle
  • 796
  • 5
  • 22
David Cheung
  • 1,628
  • 1
  • 13
  • 17
38

What, hang on?! ... Okay ya, maybe this makes more sense to someones else too.

[nodejs 7 mind you]

const fs = import('fs');
const dirCont = fs.readdirSync( dir );
const files = dirCont.filter( ( elm ) => elm.match(/.*\.(html?)/ig));

Do whatever with regex make it an argument you set in the function with a default etc.

ya_dimon
  • 3,483
  • 3
  • 31
  • 42
Master James
  • 1,691
  • 15
  • 19
  • 3
    This will only get matching files in the root directory. – dreamerkumar Jun 03 '17 at 20:24
  • 7
    I tried to edit and was rejected, which I disagree with. Here is my proposal: https://stackoverflow.com/review/suggested-edits/19188733 `wl` makes so sense whatsoever. Also the import for fs is missing. The three lines you need are: 1. `const fs = require('fs');` 2. `const dirCont = fs.readdirSync( dir );` 3. `const files = dirCont.filter( ( elm ) => /.*\.(htm?html)/gi.test(elm) );` – Avindra Goolcharan Mar 23 '18 at 03:38
  • right sorry wl.fs is where I stored the fs lib via import. – Master James Nov 23 '19 at 07:16
  • oh import is probably my own custom function that points to require for now too so sure use require or whatever you have to do. – Master James Nov 23 '19 at 07:19
18

Based on Lucio's code, I made a module. It will return an away with all the files with specific extensions under the one. Just post it here in case anybody needs it.

var path = require('path'), 
    fs   = require('fs');


/**
 * Find all files recursively in specific folder with specific extension, e.g:
 * findFilesInDir('./project/src', '.html') ==> ['./project/src/a.html','./project/src/build/index.html']
 * @param  {String} startPath    Path relative to this file or other file which requires this files
 * @param  {String} filter       Extension name, e.g: '.html'
 * @return {Array}               Result files with path string in an array
 */
function findFilesInDir(startPath,filter){

    var results = [];

    if (!fs.existsSync(startPath)){
        console.log("no dir ",startPath);
        return;
    }

    var files=fs.readdirSync(startPath);
    for(var i=0;i<files.length;i++){
        var filename=path.join(startPath,files[i]);
        var stat = fs.lstatSync(filename);
        if (stat.isDirectory()){
            results = results.concat(findFilesInDir(filename,filter)); //recurse
        }
        else if (filename.indexOf(filter)>=0) {
            console.log('-- found: ',filename);
            results.push(filename);
        }
    }
    return results;
}

module.exports = findFilesInDir;
Nicolas S.Xu
  • 13,794
  • 31
  • 84
  • 129
14

You can use Filehound to do this.

For example: find all .html files in /tmp:

const Filehound = require('filehound');

Filehound.create()
  .ext('html')
  .paths("/tmp")
  .find((err, htmlFiles) => {
    if (err) return console.error("handle err", err);

    console.log(htmlFiles);
});

For further information (and examples), check out the docs: https://github.com/nspragg/filehound

Disclaimer: I'm the author.

nickool
  • 834
  • 11
  • 11
14

I have looked at the above answers and have mixed together this version which works for me:

function getFilesFromPath(path, extension) {
    let files = fs.readdirSync( path );
    return files.filter( file => file.match(new RegExp(`.*\.(${extension})`, 'ig')));
}

console.log(getFilesFromPath("./testdata", ".txt"));

This test will return an array of filenames from the files found in the folder at the path ./testdata. Working on node version 8.11.3.

Joshua Pinter
  • 45,245
  • 23
  • 243
  • 245
Netsi1964
  • 3,244
  • 1
  • 27
  • 17
7

The following code does a recursive search inside ./ (change it appropriately) and returns an array of absolute file names ending with .html

var fs = require('fs');
var path = require('path');

var searchRecursive = function(dir, pattern) {
  // This is where we store pattern matches of all files inside the directory
  var results = [];

  // Read contents of directory
  fs.readdirSync(dir).forEach(function (dirInner) {
    // Obtain absolute path
    dirInner = path.resolve(dir, dirInner);

    // Get stats to determine if path is a directory or a file
    var stat = fs.statSync(dirInner);

    // If path is a directory, scan it and combine results
    if (stat.isDirectory()) {
      results = results.concat(searchRecursive(dirInner, pattern));
    }

    // If path is a file and ends with pattern then push it onto results
    if (stat.isFile() && dirInner.endsWith(pattern)) {
      results.push(dirInner);
    }
  });

  return results;
};

var files = searchRecursive('./', '.html'); // replace dir and pattern
                                                // as you seem fit

console.log(files);
Nikhil
  • 1,267
  • 15
  • 16
4

You can use OS help for this. Here is a cross-platform solution:

1. The bellow function uses ls and dir and does not search recursively but it has relative paths

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
    var command = "";
    if(/^win/.test(process.platform)){
        command = "dir /B "+folder+"\\*."+extension;
    }else{
        command = "ls -1 "+folder+"/*."+extension;
    }
    exec(command,function(err,stdout,stderr){
        if(err)
            return cb(err,null);
        //get rid of \r from windows
        stdout = stdout.replace(/\r/g,"");
        var files = stdout.split("\n");
        //remove last entry because it is empty
        files.splice(-1,1);
        cb(err,files);
    });
}

findFiles("folderName","html",function(err,files){
    console.log("files:",files);
})

2. The bellow function uses find and dir, searches recursively but on windows it has absolute paths

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
    var command = "";
    if(/^win/.test(process.platform)){
        command = "dir /B /s "+folder+"\\*."+extension;
    }else{
        command = 'find '+folder+' -name "*.'+extension+'"'
    }
    exec(command,function(err,stdout,stderr){
        if(err)
            return cb(err,null);
        //get rid of \r from windows
        stdout = stdout.replace(/\r/g,"");
        var files = stdout.split("\n");
        //remove last entry because it is empty
        files.splice(-1,1);
        cb(err,files);
    });
}

findFiles("folder","html",function(err,files){
    console.log("files:",files);
})
Emil Condrea
  • 9,705
  • 7
  • 33
  • 52
  • 2
    This is not the way to do it "using nodejs". This is using the OS, launching another process, etc. It also fails if there's a dir ending in ".html", e.g.: files.html/ – Lucio M. Tato Aug 24 '14 at 18:36
  • @LucioM.Tato you cand specify file type when searching. There are a lot of solutions to a problem, if one does not match your idea it just does not mean it is wrong, it is just different. This answer proves that you can reuse existing solutions no matter what scripting language is used. – Emil Condrea Aug 24 '14 at 18:47
  • Of course that's nothing wrong with iterating over a directory and finding the files with certain extension but I just wanted to receive from the OS all this information because I knew he can do it. :) – Emil Condrea Aug 24 '14 at 18:53
  • Nice. Couple suggestions though, add "-type f " into the find arguments to just look up files (mac, linux?). e.g. --- find "path" -type f -name "*.ext" --- and the final cb(err, files) should be cb(files). – bob May 30 '15 at 06:43
  • This is a bad solution IMO. The whole point of node is to abstract away from OS commands. You could make any program using OS commands if you really wanted to. – Ben Lorantfy May 26 '18 at 21:07
4

Can't add a comment because of reputation, but notice the following:

Using fs.readdir or node-glob to find a wildcard set of files in a folder of 500,000 files took ~2s. Using exec with DIR took ~0.05s (non recursive) or ~0.45s (recursive). (I was looking for ~14 files matching my pattern in a single directory).

So far, I have failed to find any nodejs implementation which uses low level OS wildcard searching for efficiency. But the above DIR/ls based code works wonderfully in windows in terms of efficiency. linux find, however, will likely be very slow for large directories.

Simon H
  • 373
  • 4
  • 7
  • 1
    Note I see there are new functions in latest nodejs fs module (12.13+? iterated directory fns?). I have not tried them yet because I'm stuck on 6.9.11 for now; will be interesting to see if they provide any new useful features for this. Thinking about my post now; OS caching should also be considered. My 0.05s would likely have been measured AFTER having run it a number of times. I wonder what the FIRST 'DIR' speed is? – Simon H Jan 05 '20 at 08:19
3

Take a look into file-regex

let findFiles = require('file-regex')
let pattern = '\.js'

findFiles(__dirname, pattern, (err, files) => {  
   console.log(files);
})

This above snippet would print all the js files in the current directory.

Akash Babu
  • 950
  • 6
  • 10
3

Install

you can install this package walk-sync by

yarn add walk-sync

Usage

const walkSync = require("walk-sync");
const paths = walkSync("./project1/src", {globs: ["**/*.html"]});
console.log(paths);   //all html file path array
Muhammad Numan
  • 23,222
  • 6
  • 63
  • 80
2

my two pence, using map in place of for-loop

var path = require('path'), fs = require('fs');

var findFiles = function(folder, pattern = /.*/, callback) {
  var flist = [];

  fs.readdirSync(folder).map(function(e){ 
    var fname = path.join(folder, e);
    var fstat = fs.lstatSync(fname);
    if (fstat.isDirectory()) {
      // don't want to produce a new array with concat
      Array.prototype.push.apply(flist, findFiles(fname, pattern, callback)); 
    } else {
      if (pattern.test(fname)) {
        flist.push(fname);
        if (callback) {
          callback(fname);
        }
      }
    }
  });
  return flist;
};

// HTML files   
var html_files = findFiles(myPath, /\.html$/, function(o) { console.log('look what we have found : ' + o} );

// All files
var all_files = findFiles(myPath);
jset74
  • 99
  • 1
  • 6
2

To the myriad of possible solutions we can also add fs-jetpack library that is perfect for build-script purposes.

const jetpack = require("fs-jetpack");

// the sync way
const files = jetpack.find("my_project", { matching: "*.html" });
console.log(files);

// or the async way
jetpack.findAsync("my_project", { matching: "*.html" }).then(files => {
  console.log(files);
});

1

I just noticed, you are using sync fs methods, that might block you application, here is a promise-based async way using async and q, you can execute it with START=/myfolder FILTER=".jpg" node myfile.js, assuming you put the following code in a file called myfile.js:

Q = require("q")
async = require("async")
path = require("path")
fs = require("fs")

function findFiles(startPath, filter, files){
    var deferred;
    deferred = Q.defer(); //main deferred

    //read directory
    Q.nfcall(fs.readdir, startPath).then(function(list) {
        var ideferred = Q.defer(); //inner deferred for resolve of async each
        //async crawling through dir
        async.each(list, function(item, done) {

            //stat current item in dirlist
            return Q.nfcall(fs.stat, path.join(startPath, item))
                .then(function(stat) {
                    //check if item is a directory
                    if (stat.isDirectory()) {
                        //recursive!! find files in subdirectory
                        return findFiles(path.join(startPath, item), filter, files)
                            .catch(function(error){
                                console.log("could not read path: " + error.toString());
                            })
                            .finally(function() {
                                //resolve async job after promise of subprocess of finding files has been resolved
                                return done();
                             });
                    //check if item is a file, that matches the filter and add it to files array
                    } else if (item.indexOf(filter) >= 0) {
                        files.push(path.join(startPath, item));
                        return done();
                    //file is no directory and does not match the filefilter -> don't do anything
                    } else {
                        return done();
                    }
                })
                .catch(function(error){
                    ideferred.reject("Could not stat: " + error.toString());
                });
        }, function() {
            return ideferred.resolve(); //async each has finished, so resolve inner deferred
        });
        return ideferred.promise;
    }).then(function() {
        //here you could do anything with the files of this recursion step (otherwise you would only need ONE deferred)
        return deferred.resolve(files); //resolve main deferred
    }).catch(function(error) {
        deferred.reject("Could not read dir: " + error.toString());
        return
    });
    return deferred.promise;
}


findFiles(process.env.START, process.env.FILTER, [])
    .then(function(files){
        console.log(files);
    })
    .catch(function(error){
        console.log("Problem finding files: " + error);
})
1

You can Edit this code to suit what you intend doing. I used the sync versions for the nodejs IO operations so that the results will be returned before node continues executing the next lines of code:

const fs = require('fs');
const path = require('path');
    
// Path to the directory(folder) to look into
const dirPath = path.resolve(`${__dirname}../../../../../tests_output`);
        
// Read all files with .html extension in the specified folder above
const filesList = fs.readdirSync(dirPath, (err, files) => files.filter((e) => path.extname(e).toLowerCase() === '.html'));
        
// Read the content of the first file with .txt extension in the folder
const data = fs.readFileSync(path.resolve(`${__dirname}../../../../../tests_output/${filesList[0]}`), 'utf8');

res.writeHead(200, { 'Content-Type': 'text/html' });
res.write(data);
return res.end();
0

I would recommed you use the libary recursive-readdir


it also has the ability to search for other file types and not just .html files

you can read the doumentaion here

Angus
  • 70
  • 6
-1

Old post but ES6 now handles this out of the box with the includes method.

let files = ['file.json', 'other.js'];

let jsonFiles = files.filter(file => file.includes('.json'));

console.log("Files: ", jsonFiles) ==> //file.json
James
  • 429
  • 1
  • 8
  • 17