I want to pass the date value (from a variable) as partition column in sql file. The variable is not getting evaluated. The variable is stored in Xcom. Is there any better way to do .
Rendered template:
TO 's3://opsake/prc/deldb/ldp_pipeline_hist/partiton_key=''{{ ti.xcom_pull(task_ids='genExecParam', key='app_run_dt') }}''/'
iam_role 'arn:aws20xxx:role/Re11dccc3role'
allowoverwrite
format as parquet
maxfilesize 100 mb;, parameters: None
Code : Here I am using the ds or inputdate
def genExecParam(**kwargs):
if 'ds_date' in kwargs['dag_run'].conf:
app_run_dt = (kwargs['dag_run'].conf['ds_date'])
else:
app_run_dt = kwargs['ds']
var_dt = datetime.fromisoformat(kwargs['ds'])
app_prev_run_dt = (var_dt + timedelta(days=-1)).strftime('%Y-%m-%d')
# (datetime.now(timezone('UTC'))+timedelta(days=-1)).strftime(date_format)
app_run_id = datetime.now(timezone('UTC')).strftime('%Y%m%d%H%M%S')
kwargs['ti'].xcom_push(key='app_run_id', value=app_run_id)
kwargs['ti'].xcom_push(key='app_run_dt', value=app_run_dt)
Dag:
from datetime import datetime, timedelta
from airflow import DAG
from airflow.providers.amazon.aws.operators.redshift import RedshiftSQLOperator
from airflow.utils.trigger_rule import TriggerRule
from pytz import timezone
import pendulum
from utils.dagUtils import *
localtz = pendulum.timezone('America/Los_Angeles')
# default arguments to DAG
default_args = {
"owner": "delta",
"depends_on_past": False,
"start_date": datetime(2022, 7, 2, tzinfo=localtz),
"email": [emalgrp],
"email_on_failure": True,
"email_on_retry": True,
"retries": 1,
"retry_delay": timedelta(minutes=5),
"max_active_runs": 1,
"catchup": False,
}
var_dt = '{{ ds }}'
with DAG(
'extrse',
default_args=default_args,
description='extse',
max_active_runs=1,
schedule_interval= '0 10 * * *',
catchup= False,
tags=['env', 'prod'],
on_success_callback=dag_success_callback,
on_failure_callback=dag_failure_callback,
template_searchpath ='/usr/local/airflow/dags/modules/'
) as dag:
AppStart = DummyOperator(task_id='start')
var_ds_date = "{{ ti.xcom_pull(task_ids='genExecParam', key='app_run_dt') }}"
var_ds_prev_date = "{{ ti.xcom_pull(task_ids='genExecParam', key='app_prev_run_dt') }}"
app_run_id = "{{ ti.xcom_pull(task_ids='genExecParam', key='app_prev_run_dt') }}"
genExecParam = PythonOperator(task_id='genExecParam', python_callable=genExecParam, provide_context=True)
sourouse = RedshiftSQLOperator(task_id='sourouse',
redshift_conn_id='deltn_id',
sql=['prc_layer/sql/lighthract.sql'],
params={"data_bucket_name": data_bucket_name,"prc_db_dir": prc_db_dir,"var_ds_date": var_ds_date})
AppStart >>genExecParam >>source_ld_lighthouse
Sql File:
UNLOAD ('select * from test.sales_report')
TO 's3://{{ params.data_bucket_name }}/{{ params.prc_db_dir }}/ldp_pipeline_hist/partiton_key={{ params.var_ds_date }}/'
iam_role 'arn:awrole'
allowoverwrite
format as parquet
maxfilesize 100 mb;