What I want to do :
- compare missing ID from MySQL and BigQuery
- get all data from MySQL where id in missing ID
table = 'orders'
def get_missing_ids():
aws_id = get_aws_id(table)
bq_id = get_bq_id(table)
missing_id = [np.setdiff1d(aws_id,bq_id)]
missing_ids = ', '.join(map(str,missing_id))
return missing_ids
missing_ids = get_missing_ids()
get_missing_data = MysqlToGCS(
task_id = 'get_orders_from_aws',
sql = """select *
from orders
where id in ({{params.missing_ids}})""",
params = {'missing_ids':missing_ids},
bucket = 'airflow_bucket',
filename = 'data/orders/db-orders{{ds}}{}',
mysql_conn_id = 'aws_readreplica',
approx_max_file_size_bytes = 100000000,
google_cloud_storage_conn_id = 'google_cloud_storage_default',
dag=dag)
def print_done():
print("done boiiiii")
time.sleep(60)
task = PythonOperator(
task_id='done',
python_callable=print_done,
dag=dag)
task.set_upstream(get_missing_data)
I read about Xcom, but I don't understand how to implement it here.