I am running MWAA on local docker with Airflow2.0.2, and I am trying to get data from snowflake and store into a local folder and I am getting the below "missing 1 required positional ". How to rectify this error?
I have checked arguments to snowflake_to_pandas and it matched required arguments, but still I get this error
from airflow import DAG
from contextlib import closing
from airflow.models import Variable
from airflow import models
import pandas as pd
import time
from datetime import datetime, timedelta
from airflow.contrib.hooks.snowflake_hook import *
from airflow.providers.snowflake.operators.snowflake import *
#f
from airflow.operators.python_operator import PythonOperator
default_args = {
'owner': 'Datexland',
'depends_on_past': False,
'start_date': datetime(2021, 6, 14),
'email': ['ata.kmu@roer.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}
# queries
query_test = """select * from DATA_CS_DEV.MING.CTY limit 1000;"""
query = """select current_date() as date;"""
def execute_snowflake(sql, snowflake_conn_id, with_cursor=False):
"""Execute snowflake query."""
hook_connection = SnowflakeHook(
snowflake_conn_id=snowflake_conn_id
)
with closing(hook_connection.get_conn()) as conn:
with closing(conn.cursor()) as cur:
cur.execute(sql)
res = cur.fetchall()
if with_cursor:
return (res, cur)
else:
return res
def snowflake_to_pandas(query_test, snowflake_conn_id,**kwargs):
"""Convert snowflake list to df."""
result, cursor = execute_snowflake(query_test, snowflake_conn_id, True)
headers = list(map(lambda t: t[0], cursor.description))
df = pd.DataFrame(result)
df.columns = headers
# save file before to send
# NOTE : This is not recommended in case of multi-worker deployments
df.show()
df.to_csv('/usr/local/airflow/data.csv',header=True,mode='w',sep=',')
return 'This File Sent Successfully'
dag = DAG(
'SNOWFLAKE_TO_S3',
default_args=default_args,
schedule_interval=None,
max_active_runs=1,
catchup=False
)
# Connection Test
snowflake_dag = SnowflakeOperator(
task_id='test_snowflake_connection',
sql=query,
snowflake_conn_id='snowflake_conn_id',
dag=dag
)
# Data Upload Task
upload_stage = PythonOperator(task_id='UPLOAD_FILE_INTO_S3_BUCKET',
python_callable=snowflake_to_pandas,
op_kwargs={"query":query_test,"snowflake_conn_id":'snowflake_conn_id'},
dag=dag )
snowflake_dag >> upload_stage