I am trying to access the tables in mysql through for loop in my DAG. I passed the table names in list and get the values from the variables. My code working fine but the problem is in for loop am not able to get the results for all the tables only the table in the last which I mentioned in the list is recieved.
I don't know where I made a mistake can someone guide me through this..
from datetime import datetime
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.providers.amazon.aws.transfers.mysql_to_s3 import MySQLToS3Operator
from airflow.providers.mysql.operators.mysql import MySqlOperator
from airflow.hooks.mysql_hook import MySqlHook
from airflow.operators.python_operator import PythonOperator
from airflow.models import Variable
import json
dag = DAG(
'Multi_Incremental_Export',
start_date=datetime(2021, 12, 5),
default_args={'mysql_conn_id': 'mysql_connection','provide_context' : True},
catchup=False
)
tab1 = Variable.get('table1')
tab2 = Variable.get('table2')
tab3 = Variable.get('table3')
tables = [tab1,tab2,tab3]
start = DummyOperator(task_id='dummy_task1', retries=2, dag=dag)
def max_source(**kwargs):
ti = kwargs['ti']
request = f"select cast(max(created_at)as char) from {tab};"
mysql_hook = MySqlHook(mysql_conn_id = 'mysql_connection', schema = 'mydata')
connection = mysql_hook.get_conn()
cursor = connection.cursor()
cursor.execute(request)
output = [x for x in cursor.fetchone()]
return output[0]
def max_metatbl(**kwargs):
ti = kwargs['ti']
request = f"select coalesce(max(created_at),'2021-12-05 00:00:00') from {meta_tbl};"
mysql_hook = MySqlHook(mysql_conn_id = 'mysql_connection', schema = 'mydata')
connection = mysql_hook.get_conn()
cursor = connection.cursor()
cursor.execute(request)
output = [x for x in cursor.fetchone()]
return output[0]
def transfer(**kwargs):
ti=kwargs['ti']
meta_val = ti.xcom_pull(task_ids=max_meta)
trns = MySQLToS3Operator(
task_id ='mysql_task',
query =f"select * from Employee where created_at >'{meta_val}';",
s3_bucket = 'mydta',
s3_key = f'{tab}-{val}.csv',
mysql_conn_id = 'mysql_connection',
aws_conn_id = 'aws_connection',
file_format = 'csv',
pd_kwargs = {'header' : False}
)
trns.execute(dict())
def mx_update(**kwargs):
ti=kwargs['ti']
meta_max = ti.xcom_pull(task_ids=mx_src)
request = f"insert into {meta_tbl} values('{meta_max}');"
mysql_hook = MySqlHook(mysql_conn_id = 'mysql_connection', schema = 'mydata')
connection = mysql_hook.get_conn()
cursor = connection.cursor()
cursor.execute(request)
connection.commit()
for tab in tables:
meta_tbl = f'{tab}'+'_Metatable'
max_meta = f'{tab}'+'_maxmeta'
meta_updt = f'{tab}'+ '_metaupdate'
mx_src=f'{tab}'+'_maxsrc'
mysql_task = f'{tab}'+'_mysql_task'
val = datetime.now().strftime("%m-%d-%Y, %H.%M")
t1 = PythonOperator(
task_id=mx_src,
python_callable=max_source,
provide_context=True,
dag=dag
)
t2 = PythonOperator(
task_id=max_meta,
python_callable=max_metatbl,
provide_context=True,
dag=dag
)
t3 = PythonOperator(
task_id= mysql_task,
python_callable= transfer,
provide_context=True,
dag=dag
)
t4 = PythonOperator(
task_id= meta_updt,
python_callable= mx_update,
provide_context=True,
dag=dag
)
start >> t1 >> t2 >> t3 >> t4