0

I am looking to do a get, run a function on the results which will do some manipulation by updating a field, and then put that doc back into the database. Really my issue is being able to chain together multiple DB calls. I have been struggling with this the past week or so. Any suggestions appreciated, thanks.

Here is what I have tried so far but I am receiving an error:

function geocode_cleanup(request, response, next) {
  r.table('dealer_locations').filter(r.row('geodata').match('No geodata found.'))
    .do(function(row) {
      var geodata = opencage_geocoder.geocode(row.Address, function(error, response) {
        if (error) {
          console.log("Error.");
          row.geodata = "No geodata found.";
          row.active = true;
        } else if (response.length == 0) {
          console.log("Empty response.");
        } else {
          console.log("Success.");
          console.log(response);
          var latitude = response[0].latitude;
          var longitude = response[0].longitude;
          row.geodata = r.point(longitude, latitude);
          row.active = true;
        }
      });
      return r.table('dealer_locations').update({
        geodata: geodata
      })
    }).run(conn, function(error, cursor) {
      response.setHeader("Content-Type", "application/json");
      if (error) {
        handleError(response, error);
      } else {
        cursor.toArray(function(error, results) {
          if (error) {
            handleError(response, error);
          } else {
            response.send(results);
          };
        });
      }
      next();
    })
};

Also, this gives the desired results returned in the response, but the second db action never happens because I am still inside of the same db connection I think:

function geocode_cleanup(request, response, next) {
    var conn = request._rdbConn;
    r.table('dealer_locations').filter({geodata: "No geodata found."}).run(conn, function(error, cursor) {
      if (error) {
        handleError(response, error);
      } else {
        cursor.toArray(function(error, results) {
          if (error) {
            handleError(response, error);
          } else {
            var i = 1;
            async.forEach(results, function(item, callback) {
            var address = (item.Address + " " + item.City).toString();
            opencage_geocoder.geocode(address, function(err, res) {
              if (err) {
                console.log(i);
                console.log("Error.");
                item.id = i;
                item.geodata = "No geodata found.";
                item.active = true;
                i++;
                callback();
              } else if (res.length == 0) {
                  console.log(i);
                  console.log("Empty response.");
                  i++;
                  callback();
                } else {
                    console.log(i);
                    console.log("Success.");
                    console.log(res);
                    var latitude = res[0].latitude;
                    console.log(i + " " + latitude);
                    var longitude = res[0].longitude;
                    console.log(i + " " + longitude);
                    item.id = i;
                    item.geodata = r.point(longitude, latitude);
                    item.active = true;
                    i++;
                    callback();
                }
              });
            }, function() {
              r.table('dealer_locations').insert(results, {
                conflict: "replace"
              }).run(request._rdbConn, function(error, results) {
                if (error) {
                  console.log("Data not inserted!");
                } else {
                  console.log("Data inserted!");
                }
              });
              console.log("Done!");
              response.send(results);
            });
          }
        })
      }
    })
  }
Garrett
  • 699
  • 5
  • 19
  • Which one is the second db action that never happens? – Jorge Silva May 19 '15 at 19:15
  • The insert. `r.table('dealer_locations').insert(results, { conflict: "replace" }).run(request._rdbConn, function(error, results) { if (error) { console.log("Data not inserted!"); } else { console.log("Data inserted!"); } });` – Garrett May 19 '15 at 19:16
  • What error are you getting in the first one? Keep in mind that you can't run JavaScript function inside your anonymous functions in ReQL (in `do`) for example. These functions are sent and executed on the server (which doesn't have access to `opencage_geocoder`, for example) – Jorge Silva May 19 '15 at 19:16
  • How do you know the callback is getting executed? Maybe add a `console.log` after the `geocode` method? – Jorge Silva May 19 '15 at 19:18
  • Sorry I'm giving you so much stuff that's so general, but there's a lot of code! – Jorge Silva May 19 '15 at 19:19
  • No that's fine haha glad for the help. I know what everything is working correctly up to that second database interaction because I am getting a response back with all the of results. The problem is then that the insert is not taking place so none of the results are getting updated in the database. – Garrett May 19 '15 at 19:35
  • Wouldn't you need to send the response after the data has been inserted? Isn't there a chance that there's an error in the insert, but you're not getting it because `response.send` has already been called? Basically, what happens if you add the `response.send` after the `insert` query has been run? – Jorge Silva May 19 '15 at 20:30
  • So the response will post prior to the asynchronous function completeing....I am guessing that is the issue then. The async stuff isn't finishing before the insert is called. – Garrett May 19 '15 at 20:42

2 Answers2

1

Here's a possible solution which uses promises to organize the code a little bit.

// Guarantee support for promises and provide the `promisify` function
var Promise = require('bluebird');
// Promisify the geocode function to make it easier to use
var geocode = Promise.promisify(opencage_geocoder.geocode);

function geocode_cleanup(request, response, next) {
  var conn = request._rdbConn;
  r
    .table('dealer_locations')
    .filter(r.row('geodata').match('No geodata found.'))
    .coerceTo('array')
    .run(conn)
    .then(function(rows) {
      // This promise will be resolve when all rows have been geocoded and updated
      // We map the rows into an array of promises, which is what Promise.all takes
      return Promise.all(rows.map(function (row) {
        return geocode(row.Address)
          .then(function (response) {
            console.log("Success.");
            var latitude = response[0].latitude;
            var longitude = response[0].longitude;
            row.geodata = r.point(longitude, latitude);
            row.active = true;
            // Return the row
            return row;
          });
        });
      }));
    })
    .then(function (rows) {
      // Now that all `dealer_locations` have been updated, re-query them
      return r
        .table('dealer_locations')
        .insert(rows, {conflict: "update", return_changes: true})
        .run(conn);
    })
    .then(function (results) {
      // Send the response;
      response.setHeader("Content-Type", "application/json");
      response.send(results);
      return;
    })
    .catch(function (err) {
      return handleError(null, error);
    })
};

Some problems I noticed with your code:

1. Use of do

 r.table('dealer_locations').filter(r.row('geodata').match('No geodata found.'))
    .do(function(row) {
      var geodata = opencage_geocoder.geocode ...
    })

In this code snippet, you use a JS function inside of do. You can't do that. Remember that what happens inside of do happens in the RethinkDB server (not in your Node.js server). Your RethinkDB server has no knowledge of your opencage_geocoder function and so this woudn't work.

Whatever do returns must be a valid ReQL query or ReQL expression. You can't execute arbitrary JavaScript inside of it.

If you want to run JavaScript with your query results, you have to .run the query and then do whatever you want to do inside the callback or .then function. At that point, that code will get executed in JavaScript and not in your RethinkDB server.

2. Use of update

return r.table('dealer_locations').update({
  geodata: geodata
})

The update method can only update a single document. You can't pass it an array of documents. In this scenario you what have needed to do r.table().get().update() in order for this to work, because you have to be referencing a single document when you update something.

If you have an array of documents that you want to update, you can use the forEach method.

r.table('hello')
 .merge({
   'new_property': 'hello!'
 })
 .forEach(function (row)  {
   // Insert that property into the document
   return r.table('hello').get(row.id).update(row);
 })

You can also do this (which you are already doing):

r.table('hello')
 .merge({
   'new_property': 'hello!'
 })
 .do(function (rows)  {
   // Insert that property into the document
   return r.table('hello')
     .insert(rows, {conflict: "update", return_changes: true});
 })
Jorge Silva
  • 4,574
  • 1
  • 23
  • 42
  • Firstly, thanks for the detailed response. You cleared up a couple other issues I think I was having due to misunderstanding how to use some of those ReQL functions. But alright, I think this will work except for one thing. the row I am looking for, "geodata", is a geo type in the database. When I do the initial geocoding during the data insertion into the database, I would normally just have my function put "No geodata found" into the row if the address has an error during the geocode process. Now when I run the function you posted, having a string in the geodata column is giving me an error. – Garrett Jun 04 '15 at 14:30
  • I swapped out all of the "No geodata found." values for null, but I still recieve this error: `Unhandled rejection RqlRuntimeError: Expected type STRING but found PTYPE in: r.table("dealer_locations").filter(r.row("geodata").match(null))` – Garrett Jun 04 '15 at 15:26
  • The `match` is used for string. If you call `match` on anything that is not a string, it will throw an error. Try `.filter(r.row("geodata").eq(null))`. – Jorge Silva Jun 04 '15 at 16:26
  • You can also try `.filter(r.row("geodata").typeOf().ne("OBJECT"))`, where you basically query all rows where the geodata property is not an object – Jorge Silva Jun 04 '15 at 16:34
  • Alright, I get a response back with the server but it doesnt like something on `return Promise.all(rows.map(function (row)` . I delved down into the response I get back and `rows._responses[0].r` has all of the data I need in it. Am I just accessing that the wrong way then? The error I get is `Unhandled rejection TypeError: undefined is not a function` – Garrett Jun 04 '15 at 16:42
  • So the `rows` returned, but not as an array of rows? Can you be more specific about the problem? – Jorge Silva Jun 04 '15 at 16:55
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/79685/discussion-between-garrett-and-jorge-silva). – Garrett Jun 04 '15 at 16:59
  • 1
    Alright, modified your answer a bit and got it working. Thanks! 1. Wrapped the entire function in my db connector: `database.createConnection(request, response).then(function(connection) { //Function };` 2. Added some error checking to just return the unchanged row if the geocode failed: `if (response.length == 0) { return row; } else { var latitude = response[0].latitude; var longitude = response[0].longitude; row.geodata = r.point(longitude, latitude); row.active = true; return row; }` 3. Also added in your edit: `.coerceTo('array')` – Garrett Jun 04 '15 at 17:34
0

OK, I have a suggestion. This queries for the documents you're interested in, modifies them (on your app server, not in the db) and then reinserts them using the nifty conflict: 'update' option. It also uses promises because I think that's a bit cleaner.

function geocode_cleanup(request, response, next) {
    r.table('dealer_locations')
        .filter(r.row('geodata').match('No geodata found.'))
        .run(conn).then(function(cursor) {
            var to_update = [];
            return cursor.toArray().then(function getGeocodes(rows) {
                return rows.map(function getGeocode(row) {
                    row.geodata = opencage_geocoder.geocode(row.Address, function(error, response) {
                        if (error) {
                            console.log("Error.");
                            row.geodata = "No geodata found.";
                            row.active = true;
                        } else if (response.length == 0) {
                            console.log("Empty response.");
                        } else {
                            console.log("Success.");
                            console.log(response);
                            var latitude = response[0].latitude;
                            var longitude = response[0].longitude;
                            row.geodata = r.point(longitude, latitude);
                            row.active = true;
                        }
                    });
                    return row;
                });
            });
        }).then(function doneGeocoding(modified_rows){
            return r.table('dealer_locations')
                .insert(modified_rows, {conflict: "update", return_changes: true})('changes')
                .coerceTo('array')
                .run(conn);
        }).then(function finishResponse(changes){
            response.setHeader("Content-Type", "application/json");
            response.send(results);
            next();
        }).catch(function(err) {
            // handle errors here
        });
};

Caveat emptor, I haven't run this, so there may be syntax errors and things

deontologician
  • 2,764
  • 1
  • 21
  • 33
  • Sorry for such a delayed response. I ran your code, and while there were no syntax errors, I just kinda hung there without executing any of the console.log statements. After poking around a bit, I changed the portion that reads: `.filter(r.row('geodata').match('No geodata found.'))` to: `.filter({geodata: "No geodata found."})` This got the code to start running, as I was seeing geodata being logged in the console, but then I got an error: RangeError: Maximum call stack size exceeded What is the difference between those two pieces of code, and what is the cause for this error? – Garrett May 20 '15 at 04:15
  • Also, where are you adding the found rows to the to_update array? Sorry, just working through the code right now trying to debug what's happening, and I noticed that the rows might not be getting adding to that array. – Garrett May 20 '15 at 04:36
  • OK, it seems the RangeError is because map can't accept huge arrays. So either it needs to be broken down into smaller sub-arrays, or it needs to be processed in a for loop – deontologician May 20 '15 at 20:03