We are developing a POC in Google's Spreadsheets.
There are some configurations, but in a nutshell it downloads data from BigQuery and redirects it to Prediction.
Our Bigquery tables have over to 41Mb, with is not allowed/supported by Spreadsheets.
We thought in download packages of 5Mb of data from Bigquery. Although Predicition API provides methods for insert lots of data, the update method allows to upload only one line/instance.
Is there any way to redirect Bigquery data straight to Prediction?
The main three functions involved are:
/**
* this function downloads data from Bigquery
* for a given table, it receives the columns supposed to be downloaded
*
* @params
* - modelName // the new prediction model name
* - columns // an array of columns
* - table // the table name
* - dataset // the dataset name
*/
function createPrediction(modelName, columns, table, dataset) {
try {
var rows = simpleQuery(columns, table, dataset);
var rowsL = rows.length;
var trainingL = parseInt(0.9 * rowsL, 10);
var training_instances = [];
for (var i = 0; i < trainingL; i++) {
training_instances.push({'output': rows[i].f[0].v, 'csvInstance': rows[i].f[1].v});
}
var resource = {'id': modelName, 'trainingInstances': training_instances};
var insert_reply = Prediction.Trainedmodels.insert(resource, c_projectId);
c_modelName = modelName;
Browser.msgBox('Insert reply:' + insert_reply, Browser.Buttons.OK);
} catch (err) {
Browser.msgBox('ERROR: ' + err, Browser.Buttons.OK);
}
}
/**
* this function should receive a dataset name, a table name and some columns' names
* it is supposed to return the "SELECT [COLUMNS] FROM [DATASET.TABLE]"
*
* @params:
* -
*/
function simpleQuery(columns, table, dataset, projectId) {
projectId = projectId || c_projectId;
return getQuery("SELECT " + columns.join(",") + " FROM [" + dataset + "." + table + "]", projectId);
}
/**
* this function should return the result for a given query
*
* @params:
* - query // the query to be evaluated
* - projectId // the googles's project id
*/
function getQuery(query, projectId) {
var request = {
query: query
};
var queryResults = BigQuery.Jobs.query(request, projectId);
var jobId = queryResults.jobReference.jobId;
// Check on status of the Query Job.
var sleepTimeMs = 500;
while (!queryResults.jobComplete) {
Utilities.sleep(sleepTimeMs);
sleepTimeMs *= 2;
queryResults = BigQuery.Jobs.getQueryResults(projectId, jobId);
}
// Get all the rows of results.
var rows = queryResults.rows;
while (queryResults.pageToken) {
queryResults = BigQuery.Jobs.getQueryResults(projectId, jobId, {
pageToken: queryResults.pageToken
});
rows = rows.concat(queryResults.rows);
}
return rows;
}
Regards.