1

I'm setting up Airflow 2.3.0 (Python 3.9.13) cluster on 4 nodes (on-prem infrastructure):

  • node1, node2 run: web UI, scheduler, flower, and worker.
  • node3, node4 run: worker.

Now, I'm facing with problem that Web UI does not display any DAGs, even they are listed. (Due to lack of reputation so I'm not allowed to attach images directly).
Airflow Web UI

Airflow Celery Flower UI

List DAGs by using airflow dags list command

Airflow directory

Airflow dags folder

I created user and group airflow to run webserver, flower, scheduler and, worker as systemd services. Here are service files and config files:

airflow-webserver.service

[Unit]
Description=Airflow webserver daemon
After=network.target

[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow webserver
Restart=always
RestartSec=15
PrivateTmp=true
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10

[Install]
WantedBy=multi-user.target

airflow-scheduler.service

[Unit]
Description=Airflow scheduler daemon
After=network.target

[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow scheduler
Restart=always
RestartSec=15
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10

[Install]
WantedBy=multi-user.target

airflow-flower.service

[Unit]
Description=Airflow Flower daemon
After=network.target

[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow celery flower
Restart=always
RestartSec=15
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10

[Install]
WantedBy=multi-user.target

airflow-worker.service

[Unit]
Description=Airflow celery worker daemon
After=network.target

[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow celery worker -q default,sys,geo,ip-log,imes,data-warehouse,<server-name>
Restart=always
RestartSec=30s

[Install]
WantedBy=multi-user.target

airflow.cfg

[core]
dags_folder = /u01/soft_root/opt/airflow/dags

hostname_callable = socket.getfqdn

default_timezone = Asia/Ho_Chi_Minh

executor = CeleryExecutor

parallelism = 128

max_active_tasks_per_dag = 16

dags_are_paused_at_creation = True

max_active_runs_per_dag = 16

load_examples = False

plugins_folder = /u01/soft_root/opt/airflow/plugins

execute_tasks_new_python_interpreter = False

fernet_key = ***

donot_pickle = False

dagbag_import_timeout = 30.0

dagbag_import_error_tracebacks = True

dagbag_import_error_traceback_depth = 2

dag_file_processor_timeout = 50

task_runner = StandardTaskRunner

default_impersonation =

security =

unit_test_mode = False

enable_xcom_pickling = False

killed_task_cleanup_time = 60

dag_run_conf_overrides_params = True

dag_discovery_safe_mode = True

dag_ignore_file_syntax = regexp

default_task_retries = 0

default_task_weight_rule = downstream

default_task_execution_timeout =

min_serialized_dag_update_interval = 30

compress_serialized_dags = False

min_serialized_dag_fetch_interval = 10

max_num_rendered_ti_fields_per_task = 30

check_slas = True

xcom_backend = airflow.models.xcom.BaseXCom

lazy_load_plugins = True

lazy_discover_providers = True

hide_sensitive_var_conn_fields = True

sensitive_var_conn_names =

default_pool_task_slot_count = 128

max_map_length = 1024

[database]
sql_alchemy_conn = postgresql+psycopg2://***


sql_engine_encoding = utf-8


sql_alchemy_pool_enabled = True

sql_alchemy_pool_size = 2

sql_alchemy_max_overflow = 10

sql_alchemy_pool_recycle = 1800

sql_alchemy_pool_pre_ping = True

sql_alchemy_schema =


load_default_connections = False

max_db_retries = 3

[logging]
base_log_folder = /u01/soft_root/var/log/airflow

remote_logging = False

remote_log_conn_id =

google_key_path =

remote_base_log_folder =

encrypt_s3_logs = False

logging_level = INFO

celery_logging_level =

fab_logging_level = WARNING

logging_config_class = airflow_logging_settings.LOGGING_CONFIG

colored_console_log = True

colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter

log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
task_log_prefix_template =
log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log

log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location = /u01/soft_root/var/log/airflow/dag_processor_manager/dag_processor_manager.log

task_log_reader = task

extra_logger_names =

worker_log_server_port = 8793

[metrics]

statsd_on = True
statsd_host = ***
statsd_port = 8125
statsd_prefix = airflow

statsd_allow_list =

stat_name_handler =

statsd_datadog_enabled = False

statsd_datadog_tags =


[secrets]
backend =

backend_kwargs =

[cli]
api_client = airflow.api.client.local_client

endpoint_url = http://***:8328

[debug]
fail_fast = False

[api]
enable_experimental_api = False

auth_backends = airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session

maximum_page_limit = 100

fallback_page_limit = 100

google_oauth2_audience =

google_key_path =

access_control_allow_headers =

access_control_allow_methods =

access_control_allow_origins =

[lineage]
backend =

[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =

[operators]
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0

default_queue = default

allow_illegal_arguments = False

[hive]
default_hive_mapred_queue =


[webserver]
base_url = http://***:8328

default_ui_timezone = Asia/Ho_Chi_Minh

web_server_host = 0.0.0.0

web_server_port = 8328

web_server_ssl_cert =

web_server_ssl_key =

session_backend = database

web_server_master_timeout = 120

web_server_worker_timeout = 120

worker_refresh_batch_size = 1

worker_refresh_interval = 60

reload_on_plugin_change = False

secret_key = ***

workers = 4

worker_class = sync

access_logfile = -

error_logfile = -

access_logformat =

expose_config = False

expose_hostname = True

expose_stacktrace = True

dag_default_view = graph

dag_orientation = LR

log_fetch_timeout_sec = 5

log_fetch_delay_sec = 2

log_auto_tailing_offset = 30

log_animation_speed = 1000

hide_paused_dags_by_default = False

page_size = 100


default_dag_run_display_number = 25

enable_proxy_fix = False

proxy_fix_x_for = 1

proxy_fix_x_proto = 1

proxy_fix_x_host = 1

proxy_fix_x_port = 1

proxy_fix_x_prefix = 1

cookie_secure = False

cookie_samesite = Lax

default_wrap = False

x_frame_enabled = True



show_recent_stats_for_completed_runs = True

update_fab_perms = True

session_lifetime_minutes = 43200


instance_name_has_markup = False

auto_refresh_interval = 3

warn_deployment_exposure = True

audit_view_excluded_events = gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data


[email]

email_backend = airflow.utils.email.send_email_smtp

email_conn_id = smtp_default

default_email_on_retry = False

default_email_on_failure = True



from_email = ***

[smtp]

smtp_host = 
smtp_starttls = True
smtp_ssl = False
smtp_user = 
smtp_password = 
smtp_port = 
smtp_mail_from = 
smtp_timeout = 30
smtp_retry_limit = 5

[sentry]

sentry_on = false
sentry_dsn =


[local_kubernetes_executor]

kubernetes_queue = kubernetes

[celery_kubernetes_executor]

kubernetes_queue = kubernetes

[celery]

celery_app_name = airflow.executors.celery_executor

worker_concurrency = 32


worker_prefetch_multiplier = 1

worker_enable_remote_control = true

worker_umask = 0o077

broker_url = amqp://***

result_backend = db+postgresql+psycopg2://***

flower_host = 0.0.0.0

flower_url_prefix =

flower_port = 8555

flower_basic_auth =

sync_parallelism = 0

celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =

pool = prefork

operation_timeout = 2.0

task_track_started = True

task_adoption_timeout = 600

task_publish_max_retries = 3

worker_precheck = False

[celery_broker_transport_options]

visibility_timeout = 21600

[dask]

cluster_address = 127.0.0.1:8786

tls_ca =
tls_cert =
tls_key =

[scheduler]
job_heartbeat_sec = 5

scheduler_heartbeat_sec = 15

num_runs = -1

scheduler_idle_sleep_time = 1

min_file_process_interval = 10

deactivate_stale_dags_interval = 60

dag_dir_list_interval = 20

print_stats_interval = 30

pool_metrics_interval = 5.0

scheduler_health_check_threshold = 60

orphaned_tasks_check_interval = 300.0
child_process_log_directory = /u01/soft_root/var/log/airflow/scheduler

scheduler_zombie_task_threshold = 300

zombie_detection_interval = 10.0

catchup_by_default = True

ignore_first_depends_on_past_by_default = True

max_tis_per_query = 512

use_row_level_locking = True

max_dagruns_to_create_per_loop = 10

max_dagruns_per_loop_to_schedule = 20

schedule_after_task_execution = True

parsing_processes = 2

file_parsing_sort_mode = modified_time

standalone_dag_processor = False

max_callbacks_per_loop = 20

use_job_schedule = True

allow_trigger_in_future = False

dependency_detector = airflow.serialization.serialized_objects.DependencyDetector

trigger_timeout_check_interval = 15

[triggerer]
default_capacity = 1000

[kerberos]
ccache = /tmp/krb5cc_airflow_new

principal = ***
reinit_frequency = 3600
kinit_path = kinit
keytab = /etc/security/keytabs/airflow_new.keytab

forwardable = True

include_ip = True

[github_enterprise]
api_rev = v3

[elasticsearch]
host =

log_id_template = {dag_id}-{task_id}-{run_id}-{map_index}-{try_number}

end_of_log_mark = end_of_log

frontend =

write_stdout = False

json_format = False

json_fields = asctime, filename, lineno, levelname, message

host_field = host

offset_field = offset

[elasticsearch_configs]
use_ssl = False
verify_certs = True

[kubernetes]
pod_template_file =

worker_container_repository =

worker_container_tag =

namespace = default

delete_worker_pods = True

delete_worker_pods_on_failure = False

worker_pods_creation_batch_size = 1

multi_namespace_mode = False

in_cluster = True



kube_client_request_args =

delete_option_kwargs =

enable_tcp_keepalive = True

tcp_keep_idle = 120

tcp_keep_intvl = 30

tcp_keep_cnt = 6

verify_ssl = True

worker_pods_pending_timeout = 300

worker_pods_pending_timeout_check_interval = 120

worker_pods_queued_check_interval = 60

worker_pods_pending_timeout_batch_size = 100

[sensors]
default_timeout = 604800

[smart_sensor]
use_smart_sensor = False

shard_code_upper_limit = 10000

shards = 5

sensors_enabled = NamedHivePartitionSensor

airflow (env file in serivce)

# Path configuration for airflow
PATH=/usr/local/sbin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin

# For SSL connection to our IPA-signed certificate
REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-bundle.crt

# Airflow configuration
AIRFLOW_CONFIG=/etc/airflow/airflow.cfg
AIRFLOW_HOME=/u01/soft_root/opt/airflow
# Prevent some airflow worker invalid warning
AIRFLOW__CORE__BASE_LOG_FOLDER=/u01/soft_root/var/log/airflow
AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION=/u01/soft_root/var/log/airflow/dag_processor_manager/dag_processor_manager.log
AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY=/u01/soft_root/var/log/airflow/scheduler

LD_LIBRARY_PATH=/usr/local/lib

I tried to restart services many times, but it didn't work out. In dags folder, I tried to change permission to 777, it didn't work too. Then I don't know why and how to get DAGs appeared on Airflow Web UI. Could anyone help me figure out what I missed or I am wrong? Thank you for your support.

  • Hi, I'm experiencing a similar problem. Did you find a solution back then? I've also a list of dags in the scheduler container (Kubernetes), but they are not shown in the webserver UI .. until I execute "airflow dags reserialize" on the scheduler container. Then they are shown in the UI. Somehow the serialization does not take place on a git sync of the dags. – mcrot Sep 15 '22 at 14:04
  • For all who may come across: In my case, I'm using Airflow on Kubernetes and the default setting by the Helm chart was to activate the standalone DAG processor. I'm not sure whether this processor had an issue in my setup, but when I deactivated it, the DAGs were automatically serialized and visible in the UI. You may see in `airflow.cfg` in the `[scheduler]` section whether the standalone dag processor is activated or not. – mcrot Sep 15 '22 at 14:35

0 Answers0