0

I am trying to pull output from an Airflow task and dynamically spin up multiple task instances based on the previous task output. But when I try to read output from filter_backfill_info as filter_backfill_info.output i get XComArg object has no attribute output. According to airflow documentation here: https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/xcom_arg/index.html It shows xcomarg = any_op.output but it doesn't seem to work as expected or am I doing something wrong? The output of filter_backfill_info is a dict.

def filter_backfill_dates(task_id: str,
                           tableid: str,
                           source_checksum_query: List,
                           target_checksum_query: List
                        ):
    @task(task_id=task_id)
    def filter_backfill_dates_task(tableid, _source_checksum_query_output, _target_checksum_query_output):
        LOG.info(f"checksum query results: source={_source_checksum_query_output}  target={_target_checksum_query_output}")

        checksum_diff = [x for x in _source_checksum_query_output if x not in _target_checksum_query_output and x != None] + \
                        [x for x in _target_checksum_query_output if x not in _source_checksum_query_output and x != None]
        LOG.info(f'checksum_diff is {checksum_diff}')

        if len(checksum_diff) > 0:
            dates = list(set([l[0] for l in checksum_diff]))
            backfill_details = BackfillDetails(tableid=tableid,
                    dates=dates
                ).asdict()

   
        else:
            return

        return backfill_details

    return filter_backfill_dates_task(tableid, source_checksum_query,target_checksum_query)


filter_backfill_info = filter_backfill_dates(task_names_constructor.filter_backfill_dates_task_name,
                                                    tableid,
                                                    source_checksum_query.output,
                                                    target_checkum_query.output)

for dt in filter_backfill_info.output['dates']:
    end_dt = datetime.strftime(datetime.strptime(str(dt), '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d')
    trigger_replicator_dag = BashOperator(task_id=f'trigger_replicator_dag_task_{dt}', \
                    bash_command=f"airflow tasks clear -d -s '{dt}' -e '{end_dt}' -y -t business_date_{filter_backfill_info['table']} importer_daily_replicator")


filter_backfill_info >> trigger_replicator_dag
pyhotshot
  • 445
  • 8
  • 23

1 Answers1

1

If you want a task that return a dictionary to set each key as Xcom key (not as return_value) then you should add multiple_outputs=True to the "task" decorator.

@task(task_id=task_id, multiple_outputs=True)
def filter_backfill_dates_task(tableid, _source_checksum_query_output, _target_checksum_query_output):
    ...
ozs
  • 3,051
  • 1
  • 10
  • 19