I want to write data from csv files into a BigQuery table.
I have built the following data pipeline for that end: csv file in Cloud Storage <=> GCS Text to PubSub DataFlow <=> Cloud Function <=> BigQuery table.
The Cloud Function is configured to be triggered upon arrival of a new message in the PubSub topic. My function execution keeps failing due to the data extracted from the PubSub JSON message not being in the correct JSON format before insertion in BigQuery. I have tried to modify the data format in the csv file but it desperately keeps failing up to a point where I wonder if this is even possible. The same pipeline with an NDJSON source file works like a charm and writes in BQ.
1) Here is my table (11 fields):
Member_ID INTEGER First_Name STRING
Last_Name STRING
Gender STRING
Age INTEGER
Height INTEGER
Weight INTEGER
Hours_Sleep INTEGER
Calories_Consumed INTEGER
Exercise_Calories_Burned INTEGER
Date DATE
2) Here is my function:
/**
* Background Cloud Function to be triggered by PubSub.
*
* @param {object} event The Cloud Functions event.
* @param {function} callback The callback function.
*/
exports.subscribe = function (event, callback) {
const BigQuery = require('@google-cloud/bigquery');
const projectId = "iot-fitness-198120"; //Enter your project ID here
const datasetId = "CDCData"; //Enter your BigQuery dataset name here
const tableId = "fitness"; //Enter your BigQuery table name here
const PubSubMessage = event.data;
console.log(`Le Message PubSub en base64 est: ${PubSubMessage}`);
// Incoming data is in JSON format
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data, 'base64').toString() : "{'Member_ID':'na','First_Name':'na','Last_Name':'na','Gender':'na','Age':'na','Height':'na','Weight':'na','Hours_Sleep':'na','Calories_Consumed':'na','Exercise_Calories_Burned':'na','Date':'na'}";
console.log(`Le Message PubSub en String est: ${incomingData}`);
const jsonData = JSON.parse(incomingData);
console.log(`Le Message PubSub parse en JSON est: ${jsonData}`);
var rows = [jsonData];
console.log(`Incoming data: ${rows}`);
// Instantiates a client
const bigquery = BigQuery({
projectId: projectId
});
console.log(`BigQuery Client instantiated`);
// Inserts data into a table
bigquery
.dataset(datasetId)
.table(tableId)
.insert(rows)
.then((insertErrors) => {
console.log('Inserted:');
rows.forEach((row) => console.log(row));
if (insertErrors && insertErrors.length > 0) {
console.log('Insert errors:');
insertErrors.forEach((err) => console.error(err));
}
})
.catch((err) => {
console.error('ERROR:', err);
});
// [END bigquery_insert_stream]
callback();
};
3) lastly, below is the main error message when trying to import the following simple csv file: 123456,Jack,Jones,F,39,183,130,8,2501,990,2017-11-09.
2018-03-19 22:44:40.585 GST
function-fitnessPubSubToBQ
58216840933966
SyntaxError: Unexpected token , in JSON at position 6 at Object.parse (native) at exports.subscribe (/user_code/index.js:17:25) at /var/tmp/worker/worker.js:695:16 at /var/tmp/worker/worker.js:660:9 at _combinedTickCallback (internal/process/next_tick.js:73:7) at process._tickDomainCallback (internal/process/next_tick.js:128:9)
Expand all | Collapse all {
insertId: "000000-2437f842-1bab-47c3-9583-b91ffd0cc601"
labels: {…}
logName: "projects/iot-fitness-198120/logs/cloudfunctions.googleapis.com%2Fcloud-functions"
receiveTimestamp: "2018-03-19T18:44:45.895841517Z"
resource: {…}
severity: "ERROR"
textPayload: "SyntaxError: Unexpected token , in JSON at position 6
at Object.parse (native)
at exports.subscribe (/user_code/index.js:17:25)
at /var/tmp/worker/worker.js:695:16
at /var/tmp/worker/worker.js:660:9
at _combinedTickCallback (internal/process/next_tick.js:73:7)
at process._tickDomainCallback (internal/process/next_tick.js:128:9)"
timestamp: "2018-03-19T18:44:40.585Z"
}
It complains about the data format and upon executing "JSON.parse(incomingData);" How can I get the data to be formatted before JSON parsing ?
Thanks.