0

why i m getting this error localhost didn’t send any data.

Even i am sending the processed csv string from python to nodejs. The csv string is then printed (which is not happening) on successful form submit. Please see into images attached i am getting the csv string in the console but not in the csv file.

My app.js file which sends input (csvData, req.body.keywords, req.body.full_search) to the python script for use in processing.

/* csv to json */
const express = require("express"),
  app = express(),
  upload = require("express-fileupload"),
  csvtojson = require("csvtojson");

var http = require('http');
var path = require("path");
var bodyParser = require('body-parser');
var helmet = require('helmet');
var rateLimit = require("express-rate-limit");

let csvData = "test";
app.use(upload());

var server = http.createServer(app);

const limiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100 // limit each IP to 100 requests per windowMs
});

// var db = new sqlite3.Database('./database/employees.db');
app.use(bodyParser.urlencoded({extended: false}));
// app.use(express.static(path.join(__dirname,'./Final')));
app.use(helmet());
app.use(limiter);

server.listen(process.env.PORT || 3000, function() { 
    console.log('server running on port 3000'); 
})


app.get('/', function(req, res){
    // res.sendFile(path.join(__dirname,'./index.html'));
    res.sendFile(path.join(__dirname, './index.html'));
});

// Will remove all falsy values: undefined, null, 0, false, NaN and "" (empty string)
function cleanArray(actual) {
  var newArray = new Array();
  for (var i = 0; i < actual.length; i++) {
    if (actual[i]) {
      newArray.push(actual[i]);
    }
  }
  return newArray;
}

function get_array_string(inp){

  var str = "";
  for (var i = 0; i < inp.length; i++){
    str = str + " " + inp[i]
  }
  return str.trim()

}

// form submit request
app.post('/formsubmit', function(req, res){

    // console.log("Form submit request")
    csvData = req.files.csvfile.data.toString('utf8');
    filteredArray = cleanArray(csvData.split(/\r?\n/))
    csvData = get_array_string(filteredArray)

    console.log("URL list received: "+csvData)
    console.log("Search keywords: "+req.body.keywords)
    console.log("Full search: "+req.body.full_search)

    // return csvtojson().fromString(csvData).then(json => 
    //     {return res.status(201).json({csv:csvData, json:json})})

    // Send request to python script
    var spawn = require('child_process').spawn;
    var process = spawn('python', ["./WebExtraction.py", csvData, req.body.keywords, req.body.full_search])

    dataString = "";

    process.stdout.on('data', function(data){

      dataString = dataString + data.toString()

      console.log(dataString)

      res.setHeader('Content-disposition', 'attachment; filename=test.txt');
      res.set('Content-Type', 'text/csv');
      res.status(200).send(dataString);

    });

    process.stdout.on('end', function(){      

    });
    
    process.stdin.end();

});

Below is the part of the python script which sends the csv string export_csv to the node js application.

if(flag == 1):
    # Get the output string   
    # print("################### - OUTPUT - ############################")
    found_results_A = list(set(found_results))
    found_results = get_list(found_results)
    found_results = list(set(found_results))
    notfound_results = get_list(notfound_results)
    notfound_results = list(set(notfound_results))
    found_keywords_list_changed = []

    for ele in found_results_A:
        found_keywords_list_changed.append(href_key_dict[ele])

    # Get the not found results correctly using set operation
    setA = set(found_results)
    setB = set(notfound_results)
    notfound_results = list(set(setB-setA))
    error_urls = get_list(error_urls)
    error_urls = list(set(error_urls))
    ######################################################################################
    ## CREATING THE FINAL DATA FRAME FOR COLLECTING the extracted urls
    ######################################################################################
    colList = ['Found urls', 'Not found urls','Error urls']
    dframe = pd.DataFrame(columns = colList, dtype = str)

    maxlen = get_max_of_list(found_results_A, found_keywords_list_changed, notfound_results, error_urls)

    found_results_A = append_space(found_results_A, maxlen)
    notfound_results = append_space(notfound_results, maxlen)
    error_urls = append_space(error_urls, maxlen)
    found_keywords_list_changed = append_space(found_keywords_list_changed, maxlen)

    if(len(found_results_A) == maxlen and len(notfound_results) == maxlen and len(error_urls) == maxlen and len(found_keywords_list_changed) == maxlen):    
        dframe['Found urls'] = found_results_A
        dframe['keywords'] = found_keywords_list_changed
        dframe['Not found urls'] = notfound_results
        dframe['Error urls'] = error_urls

    try:    
        dframe = dframe.sort_values(by=["Found urls"], ascending=False)
        data = dframe.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
        export_csv = dframe.to_csv(encoding = 'ASCII', index = None, header = True)
        # print(create_json(export_csv.strip()))
        print(export_csv.strip())
        sys.stdout.flush()
    except Exception as err:
        print('Exception occurred, Error on line {}'.format(sys.exc_info()[-1].tb_lineno), type(err).__name__, err)
        print(err)

I want to make changes in this to solve the issue but not able to proceed please help me to build this functionality.

process.stdout.on('data', function(data){

      dataString = dataString + data.toString()

      console.log(dataString)

      res.setHeader('Content-disposition', 'attachment; filename=test.txt');
      res.set('Content-Type', 'text/csv');
      res.status(200).send(dataString);

    });

    process.stdout.on('end', function(){      

    });
    
    process.stdin.end();

Below is the error i m getting

enter image description here

Also i m able to render the html page which has HTML FORM

enter image description here

Output of console

enter image description here

Nick jones
  • 63
  • 1
  • 20
  • Please help i am not able to solve this issue – Nick jones Feb 23 '21 at 09:29
  • From a cursory glance, it looks like you: 1) get a request, 2) set up a callback function for a spawned process, 3) spawn a python subprocess, 4) end your request without making sure that the callback function has been called. The solution seems to be: don't use `process.stdout.on` (which seems to set up an asynchronous callback function) and replace it with a synchronous, blocking call with a timeout. Or you need additional code to wait for `process.stdout.on` to actually fire. – orithena Feb 23 '21 at 09:41
  • @orithena i guess you r right about the asynchronous thing – Nick jones Feb 23 '21 at 09:43
  • @orithena can you share the syntax of making this function sync, i was actually searching for references – Nick jones Feb 23 '21 at 09:49
  • Nope, sorry, I don't know how it's being done in node.js. You may have better luck by editing your question to "How do I make this subprocess call synchronous?" – orithena Feb 23 '21 at 09:52

1 Answers1

0

Data sent to stdout in the child process should be concatenated in an on ("data" callback in the parent process and only sent back as an HTTP response when the an on("end" callback is called. Refactoring the child handling code might look like

process.stdout.on('data', function(data){
  dataString = dataString + data.toString()
});

process.stdout.on('end', function(){ 
  console.log(dataString)

  res.setHeader('Content-disposition', 'attachment; filename=test.txt');
  res.set('Content-Type', 'text/csv');
  res.status(200).send(dataString);     

});

process.stdin.end();

If the problem remains unsolved, or other issues emerge, try relaying stderr output received from the child process to the node console of the parent process with something like (untested):

process.stderr.on('data', function(data) {
    console.error( "stderr: %s", data);
}

This should allow you to put debug code in the python script that writes to stderr.

A previous answer (of mine) goes into more detail about obtaining the exit code and data written to stdout and stderr by a child process if needed and of help.

[P.S. dataString may not have been declared - I couldn't see a declaration for it in the post.]

traktor
  • 17,588
  • 4
  • 32
  • 53