2

I am currently trying to make the parallel execution of Apache Airflow 2.3.x dynamic task.

When executing below DAG, let say 100 dynamic "sanitize_payment" task is created in "sanitize_payment_group". I have set value of "max_active_tis_per_dag" 16 for now, but i want to change the value depending upon some condition. For example, if "batch_id" is 1, max_active_tis_per_dag=10 and if "batch_id" is 2 then max_active_tis_per_dag=15.

@dag(
    dag_id="data_transfer_dag",
    schedule_interval=None,
    tags=["Data transfer"],
    default_args={
        "owner": "Open",
        "start_date": pendulum.today("UTC").add(days=-1),
        "depends_on_past": False,
        "email_on_failure": False,
        "email_on_retry": False,
        "email": "",
        "retries": 1,
        "retry_delay": timedelta(seconds=10),
    },
    description="Data transfer dag"
)
def data_transfer():
    
    @task()
    def fetch_batch_user_to_process(**kwargs):
        batch_id = kwargs['dag_run'].conf.get('batch_id')
        prepared_data = [{"batch_id": batch_id}]
        return prepared_data
    
    @task_group(group_id="sanitize_payment_group")
    def sanitize_payment_group(users_data):
        @task(max_active_tis_per_dag=16)
        def sanitize_payment(data):
            """Some operation"""
            correct_api_version_data = data  # Dummy operation
            return correct_api_version_data
        
        task_result = sanitize_payment.expand(data=users_data)
        return task_result
    
    @task_group(group_id="process_payment_group")
    def process_payment_group(users_data):
        @task(max_active_tis_per_dag=1)
        def process_payment(payment_data):
            """Some operation"""
            data = payment_data  # Dummy operation
            return data
        
        task_result = process_payment.expand(payment_data=users_data)
        return task_result
    
    @task_group(group_id="create_contact_group")
    def create_contact_group(user_data):
        @task(max_active_tis_per_dag=16)
        def create_contact(user_info):
            """Some Operation"""
            if_contact_present = user_info  # Dummy operation 
            return if_contact_present
        
        task_result = create_contact.expand(user_info=user_data)
        return task_result
    
    @task()
    def end_processing():
        logging.info("ending the dag.")
    
    end = end_processing()
    batch_to_process = fetch_batch_user_to_process()
    
    process_payment_group(sanitize_payment_group(create_contact_group(batch_to_process))) >> end


DAG = data_transfer()
Pratyush
  • 21
  • 2

0 Answers0