I am currently trying to make the parallel execution of Apache Airflow 2.3.x dynamic task.
When executing below DAG, let say 100 dynamic "sanitize_payment" task is created in "sanitize_payment_group". I have set value of "max_active_tis_per_dag" 16 for now, but i want to change the value depending upon some condition. For example, if "batch_id" is 1, max_active_tis_per_dag=10 and if "batch_id" is 2 then max_active_tis_per_dag=15.
@dag(
dag_id="data_transfer_dag",
schedule_interval=None,
tags=["Data transfer"],
default_args={
"owner": "Open",
"start_date": pendulum.today("UTC").add(days=-1),
"depends_on_past": False,
"email_on_failure": False,
"email_on_retry": False,
"email": "",
"retries": 1,
"retry_delay": timedelta(seconds=10),
},
description="Data transfer dag"
)
def data_transfer():
@task()
def fetch_batch_user_to_process(**kwargs):
batch_id = kwargs['dag_run'].conf.get('batch_id')
prepared_data = [{"batch_id": batch_id}]
return prepared_data
@task_group(group_id="sanitize_payment_group")
def sanitize_payment_group(users_data):
@task(max_active_tis_per_dag=16)
def sanitize_payment(data):
"""Some operation"""
correct_api_version_data = data # Dummy operation
return correct_api_version_data
task_result = sanitize_payment.expand(data=users_data)
return task_result
@task_group(group_id="process_payment_group")
def process_payment_group(users_data):
@task(max_active_tis_per_dag=1)
def process_payment(payment_data):
"""Some operation"""
data = payment_data # Dummy operation
return data
task_result = process_payment.expand(payment_data=users_data)
return task_result
@task_group(group_id="create_contact_group")
def create_contact_group(user_data):
@task(max_active_tis_per_dag=16)
def create_contact(user_info):
"""Some Operation"""
if_contact_present = user_info # Dummy operation
return if_contact_present
task_result = create_contact.expand(user_info=user_data)
return task_result
@task()
def end_processing():
logging.info("ending the dag.")
end = end_processing()
batch_to_process = fetch_batch_user_to_process()
process_payment_group(sanitize_payment_group(create_contact_group(batch_to_process))) >> end
DAG = data_transfer()