I am trying to insert new data in a table already created in bigquery. After processing the data and creating the data frame, I want to add the new rows in the table. i am looking bor a efficient way of doing it .
Currently what i am doing is
def export_items_to_bigquery(ID_recipien,action_date_hour,monday, tuesday, wednesday, thursday, friday, saturday, sunday, click_action, read_action, converted,imedeate_response, weight, weight_mu, processing_date):
# Instantiates a client
bigquery_client = bigquery.Client()
# Prepares a reference to the dataset
dataset_ref = bigquery_client.dataset('internal')
table_ref = dataset_ref.table('Processed_data')
table = bigquery_client.get_table(table_ref) # API call
rows_to_insert = [(ID_recipien,action_date_hour,monday, tuesday, wednesday, thursday, friday, saturday, sunday, click_action, read_action, converted,imedeate_response, weight, weight_mu, processing_dat)]
errors = bigquery_client.insert_rows(table, rows_to_insert) # API request
#assert errors == []
print(errors)
for i in range(len(out)):
hrs=int(out.iloc[i].hours)
ID_recipient=int(out.iloc[i].ID_recipient)
Last_Date=str(out.iloc[i].Last_Date)
last_id_send=int(out.iloc[i].last_id_send)
unique_send_id_action=int(out.iloc[i].unique_send_id_action)
click_count=int(out.iloc[i].click_count)
read_count=int(out.iloc[i].read_count)
Monday=int(out.iloc[i].Monday)
Tuesday=int(out.iloc[i].Tuesday)
Wednesday=int(out.iloc[i].Wednesday)
Thursday=int(out.iloc[i].Thursday)
Friday=int(out.iloc[i].Friday)
Saturday=int(out.iloc[i].Saturday)
Sunday=int(out.iloc[i].Sunday)
Total_day_active=int(out.iloc[i].Total_day_active)
click_read_mulrtiplier=int(out.iloc[i].click_read_mulrtiplier)
click_read_weight=int(out.iloc[i].click_read_weight)
final_weight=int(out.iloc[i].final_weight)
last_hour=int(out.iloc[i].last_hour)
last_email=int(out.iloc[i].last_email)
last_log_id=int(out.iloc[i].last_log_id)
export_items_to_bigquery(hrs,ID_recipient,Last_Date, last_id_send, unique_send_id_action,click_count, read_count, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, Total_day_active, click_read_mulrtiplier, click_read_weight, final_weight, last_hour, last_email,last_log_id)
This is very inefficient
I also tried
new_data.to_gbq('internal.intermediate_table_weight_calc', 'ids-internal', chunksize=10000, if_exists='append')
but it gives Please verify that the structure and data types in the DataFrame match the schema of the destination table even when all names are same and all dtype is int.