3

I'm trying to write a workflow with Gulp 4 (see below for specific version info) that will

  1. watch a local folder for an .html file
  2. strip multiple tables out into individual .html files per table
  3. convert said tables into .csv for further processing
  4. clean the temporary directory all these files are dumped too.

The problem I'm running into is no matter what I try I cannot get my cleaning task to wait for the rest of the tasks to write files to the disk. I've tried nesting the data collection functions, including all the alteration methods into one long stream, and a handful of other clumsy solutions offered up here and other places - none of them work though. Any pointers would be a great help.

var gulp = require('gulp');
var exec = require('child_process').exec;
var rename = require('gulp-rename');
var inject = require('gulp-inject-string');
var htmlSplit = require('gulp-htmlsplit');
var del = require('del');

// Clean all non-csv files from ./data/temp
function clean() {
  return del(['data/temp/*', '!data/temp/*.csv']);
}

// Convert HTML tables to CSV files
function convertCSV(filename) {
  return exec('node node_modules/html-table-to-csv data/temp/' + filename + '.html data/temp/' + filename + '.csv');
}

// Move a renamed copy of original report to .data/temp/
function getData() {
  return gulp.src('data/report/*.html')
    .pipe(rename('injected.html'))
    .pipe(gulp.dest('data/temp'));
}

// Inject split start comments before each <table> tag
function injectBefore() {
  return gulp.src('data/temp/*.html')
    .pipe(inject.beforeEach('<table', '<!-- split table.html -->\n'))
    .pipe(gulp.dest('data/temp'));
}

// Inject split stop comments after each </table> tag
function injectAfter() {
  return gulp.src('data/temp/*.html')
    .pipe(inject.afterEach('</table>', '\n<!-- split stop -->'))
    .pipe(gulp.dest('data/temp'));
}

// Split each table into its own HTML file for CSV conversion
function htmlCSV(done) {
  var i = 0;
  return gulp.src('data/temp/injected.html')
    .pipe(htmlSplit())
    .pipe(rename(function(file) {
      // Append unique number to end of each HTML file
      file.basename += i >= 9 ? ++i : '0' + ++i;
      // Send unique numbered HTML file to convertCSV()
      convertCSV(file.basename);      
    }))
    .pipe(gulp.dest('data/temp'));
  done();
}

gulp.task('default', gulp.series(getData, injectBefore, injectAfter, htmlCSV, clean));

// FILE STRUCTURE
// analytics
// |_bower_components
// |_data
//   |_report  <-- Original report in HTML dumped here
//   |_temp    <-- Injected and converted files dumped here
// |_node_modules
// |_gulpfile.js and other files
// 
// Gulp - CLI version 1.2.2
// Gulp - Local version 4.0.0-alpha.2
// Node - v6.9.5
// NPM  - 3.10.10
// OS   - Windows 7 6.1.7601 Service pack 1 Build 7601
smac89
  • 39,374
  • 15
  • 132
  • 179
Sn3aKyGuY
  • 33
  • 4
  • Answering this will require a full rewrite, you are mixing sync and async code together. gulp tasks are expecting a returned stream or a end callback to be fired. Let me see what I can come up with – SteveLacy Feb 16 '17 at 22:33

2 Answers2

3

I removed the regular gulp plugins and the actual csv transformation as that is just a child_process execution.

The main issue with your code is that Node core child_process.exec is Asnyc, and will not return the end unless you add a callback. Replacing it with sync-exec will allow a sync process call since the gulp-rename callback does not have a callback.

var gulp = require('gulp');
var exec = require('sync-exec');
var rename = require('gulp-rename');
var del = require('del');

// Clean all non-csv files from ./data/temp
function clean() {
  return del(['temp']);
}

// Convert HTML tables to CSV files
function convertCSV(filename) {
  // return exec('node node_modules/html-table-to-csv data/temp/' + filename + '.html data/temp/' + filename + '.csv');
  return exec('sleep 5;');
}

// Move a renamed copy of original report to .data/temp/
function getData() {
  return gulp.src('t.html')
    .pipe(gulp.dest('temp/'));
}

// Split each table into its own HTML file for CSV conversion
function htmlCSV() {
  var i = 0;
  return gulp.src('t.html')
    .pipe(rename(function(file) {
      // Append unique number to end of each HTML file
      file.basename += i >= 9 ? ++i : '0' + ++i;
      // Send unique numbered HTML file to convertCSV()
      convertCSV(file.basename);
    }))
    .pipe(gulp.dest('dist'));
}

gulp.task('default', gulp.series(getData, htmlCSV, clean));
smac89
  • 39,374
  • 15
  • 132
  • 179
SteveLacy
  • 4,150
  • 2
  • 23
  • 30
  • 1
    Thanks for your direction Steve. I'm new to Gulp in general and just switched to Gulp 4 today. I'll be sure to dig into the documentation again to get a better grasp on things, but this helped get me going in the meantime. Thanks again. Unfortunately, since I'm new to SO, my up vote doesn't show publicly, but I did up vote this. – Sn3aKyGuY Feb 17 '17 at 02:50
  • Thanks, yes gulp 4 does add many nice features. Waiting on some dependency updates before we can publish it – SteveLacy Feb 17 '17 at 06:23
0

Use es7 async/await syntax as well as util.promisify to wait for it to finish:

const util = require('util');
const exec = util.promisify(require('child_process').exec);

// Convert HTML tables to CSV files
async function convertCSV(filename) {
  return await exec('node node_modules/html-table-to-csv',
    ['data/temp/' + filename + '.html',
    'data/temp/' + filename + '.csv']);
}

No need for third party libraries

smac89
  • 39,374
  • 15
  • 132
  • 179