I'm just learning mongojs and nodejs so I'm still trying to get used to programming asynchronously. What my program should do is take input from a file ('list'), read each line and pass the line to a parse function which writes data to a file and writes data to the database.
Here is my index.js:
var read_list = require('./read_list').read_list;
var parse = require('./parse');
var databaseUrl = "mydb";
var collections = ["patents"]
var db = require("mongojs").connect(databaseUrl, collections);
read_list(filename, array, parse, database, function(err) {
if(err) throw err;
db.close();
});
read_list.js:
var fs = require('fs');
var tmp = "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO1&Sect2=HITOFF&d=PALL&p=1&u=%2Fnetahtml%2FPTO%2Fsrchnum.htm&r=1&f=G&l=50&s1="
function read_list(file_name, pat_array, parse, db) {
fs.readFile(file_name, function(err, data) {
if(err) throw err;
else {
pat_array = data.toString().split("\n");
for(var i = 0; i < pat_array.length; i++) {
parse(tmp+pat_array[i]+".PN.&OS=PN/"+pat_array[i]+"&RS=PN/"+pat_array[i], pat_array[i], db);
}
}
});
}
exports.read_list = read_list
And finally parse.js:
var fs = require('fs');
var cheerio = require('cheerio');
var request = require('request');
function parse(url, filename, db) {
request(url, function(err, response, body) {
if (err)
throw err;
body = body.substring(body.indexOf("United States Patent", 43), body.indexOf("* * * * *"));
$ = cheerio.load(body);
var elems = new Array();
$("TD").each(function() {
if($(this).text())
{
elems.push($(this).text().trim());
}
})
var pat_title = $("font").text().trim();
var pat_abs = $("P").first().text().trim();
var pat_num = elems[0];
var pat_auth = elems[1];
var pat_date = elems[2];
var temp = "Title: " + pat_title + "\nNumber: " + pat_num + "\nAuthor: " + pat_auth + "\nDate: " + pat_date + "\nAbstract: " + pat_abs;
db.patents.save(
{
_id: filename.trim(),
Title: pat_title,
Number: pat_num,
Author: pat_auth,
Date: pat_date,
Abstract: pat_abs
},
function(err, saved)
{
if( err || !saved ) console.log("Patent not saved");
else console.log("Patent saved");
});
fs.writeFile(filename.trim(), temp, function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
}
exports.parse = parse;
Hopefully that makes sense and is readable, my first post here.
Everything executes as I expect it to, except the program will not terminate (must do it with ctrl+c). How can I change the program so I can close the connection in a callback, or am I missing the point and going about this the wrong way?