I try to log the user who has triggered my DAG in the Airflow that terminates postgres hanging queries and it does not work. Can you please help what could be wrong? What am I missing? When I check the log in the airflow instead of the user name there is 'None' everywhere.
utils.py (where the session logic is described)
import logging
from airflow.models.log import Log
from airflow.utils.db import create_session
from airflow.operators.python_operator import PythonOperator
from psycopg2.extras import RealDictCursor
from plugins.platform.kw_postgres_hook import KwPostgresHook
# To test this use this command:
# airflow tasks test killer_dag killer_query {date} -t '{"pid":"pid_value"}'
# Example :
# airflow tasks test killer_dag killer_query 20210803 -t '{"pid":"12345"}'
def kill_query(**kwargs):
with create_session() as session:
triggered_by = (
session.query(Log.owner)
.filter(
Log.dag_id == "killer_dag",
Log.event == "trigger",
Log.execution_date == kwargs["execution_date"],
)
.limit(1)
.scalar()
)
logging.info(
f"'{triggered_by}' triggered the Killer_dag. Getting PID for the termination."
)
pid = kwargs["params"]["pid"]
logging.info(f"This PID= '{pid}' is going to be terminated by '{triggered_by}'.")
analdb_hook = KwPostgresHook(postgres_conn_id="anal_db")
analdb_conn = analdb_hook.get_conn()
analdb_cur = analdb_conn.cursor(cursor_factory=RealDictCursor)
# Termination query receives pid as a parameter from cli
killer_query = f"""
select pg_terminate_backend('{pid}');
"""
logging.info(killer_query)
# Making sure the user provides existing pid.
# In this part the boolean result of terminating select is checked and if False error is raised.
analdb_cur.execute(killer_query)
result = analdb_cur.fetchone()
exists = result["pg_terminate_backend"]
if exists == True:
logging.info(f"The pid = '{pid}' was terminated by '{triggered_by}'.")
else:
logging.info(f"The pid = '{pid}' not found, check it again!")
return exists
def kill_hanging_queries(killer_dag):
PythonOperator(
task_id="kill_query",
python_callable=kill_query,
dag=killer_dag,
provide_context=True,
)
killer_dag.py
from datetime import datetime, timedelta
from airflow.models import DAG
from plugins.platform.utils import skyflow_email_list
from dags.utils.utils import kill_hanging_queries
killer_dag = DAG(
dag_id="killer_dag",
default_args={
"owner": "Data Intelligence: Data Platform",
"email": skyflow_email_list,
"email_on_failure": True,
"email_on_retry": False,
"depends_on_past": False,
"start_date": datetime(2021, 8, 8, 0, 0, 0),
"retries": 0,
"retry_delay": timedelta(minutes=1),
},
)
kill_hanging_queries(killer_dag)