I have a notebook in which I access data through APIs, work with the data and send the results to a BigQuery table as well as to a GCS bucket. Everything works as it should when the notebook is run manually.
However, when scheduled, it breaks with the following message:
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in to_gbq(self, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
1938 location=location,
1939 progress_bar=progress_bar,
-> 1940 credentials=credentials,
1941 )
1942
/opt/conda/lib/python3.7/site-packages/pandas/io/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials)
221 location=location,
222 progress_bar=progress_bar,
--> 223 credentials=credentials,
224 )
/opt/conda/lib/python3.7/site-packages/pandas_gbq/gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, reauth, if_exists, auth_local_webserver, table_schema, location, progress_bar, credentials, api_method, verbose, private_key)
1155 credentials=connector.credentials,
1156 )
-> 1157 table_connector.create(table_id, table_schema)
1158 else:
1159 original_schema = pandas_gbq.schema.to_pandas_gbq(table.schema)
/opt/conda/lib/python3.7/site-packages/pandas_gbq/gbq.py in create(self, table_id, schema)
1311 _Dataset(
1312 self.project_id, credentials=self.credentials, location=self.location,
-> 1313 ).create(self.dataset_id)
1314
1315 table_ref = TableReference(
/opt/conda/lib/python3.7/site-packages/pandas_gbq/gbq.py in create(self, dataset_id)
1413 self.client.create_dataset(dataset)
1414 except self.http_error as ex:
-> 1415 self.process_http_error(ex)
/opt/conda/lib/python3.7/site-packages/pandas_gbq/gbq.py in process_http_error(ex)
384 raise QueryTimeout("Reason: {0}".format(ex))
385
--> 386 raise GenericGBQException("Reason: {0}".format(ex))
387
388 def download_table(
GenericGBQException: Reason: 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/****************/datasets?prettyPrint=false: Access Denied: Project ******************: User does not have bigquery.datasets.create permission in project *****************.
(where the stars are the project-id (btw: this is not a project-id I have chosen).)
for the following part of the script:
Table_grouped_to_bg.to_gbq('retailer_accuracy.testbin', Context.default().project_id,chunksize=10000,if_exists='append') #
Table_grouped_to_bg.to_csv('gs://retailer-sectionalized-labels-csv/'+'Table_grouped_' +str(Store_name)+'_'+str(Date)+'.csv',sep=';' ,encoding='utf-8-sig')
ax1 =Table_grouped.plot.bar(x="distance_bin", y="percentage", rot=70, title="Percentage in each bin,{}".format(Date), figsize=(15, 5));
x_offset = -0.01
y_offset = 0.02
for p in ax1.patches:
b = p.get_bbox()
val = "{:+.2f}".format(b.y1 + b.y0)
ax1.annotate(val, ((b.x0 + b.x1)/2 + x_offset, b.y1 + y_offset))
fig = ax1.get_figure()
def saving_figure(path_logdir):
fig = ax1.get_figure()
fig_to_upload = plt.gcf()
# Save figure image to a bytes buffer
buf = io.BytesIO()
fig_to_upload.savefig(buf, format='png')
buf.seek(0)
image_as_a_string = base64.b64encode(buf.read())
# init GCS client and upload buffer contents
client = Storage.Client()
bucket = client.get_bucket('retailer-sectionalized-statistics-plot-png')
blob = bucket.blob('Accuracy_barplot_'+str(Store_name)+'_'+str(Date)+'.png') # This defines the path where the file will be stored in the bucket
your_file_contents = blob.upload_from_string(image_as_a_string, content_type='image/png')
plt.show(block=True)
plt.show(block=True)
....................saving_figure('Accuracy_barplot_'+str(Store_name)+'_'+str(Date)+'.png')
where the stars are the project-id.
I understand that it is no longer I who is running the project when it is scheduled. The question is then, how do I pass the permission for the notebook to be run by a GCP-instance?