I'm setting up Airflow 2.3.0 (Python 3.9.13) cluster on 4 nodes (on-prem infrastructure):
- node1, node2 run: web UI, scheduler, flower, and worker.
- node3, node4 run: worker.
Now, I'm facing with problem that Web UI does not display any DAGs, even they are listed. (Due to lack of reputation so I'm not allowed to attach images directly).
Airflow Web UI
List DAGs by using airflow dags list
command
I created user and group airflow
to run webserver, flower, scheduler and, worker as systemd services. Here are service files and config files:
airflow-webserver.service
[Unit]
Description=Airflow webserver daemon
After=network.target
[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow webserver
Restart=always
RestartSec=15
PrivateTmp=true
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10
[Install]
WantedBy=multi-user.target
airflow-scheduler.service
[Unit]
Description=Airflow scheduler daemon
After=network.target
[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow scheduler
Restart=always
RestartSec=15
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10
[Install]
WantedBy=multi-user.target
airflow-flower.service
[Unit]
Description=Airflow Flower daemon
After=network.target
[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow celery flower
Restart=always
RestartSec=15
KillMode=mixed
TimeoutSec=30
TimeoutStopSec=10
[Install]
WantedBy=multi-user.target
airflow-worker.service
[Unit]
Description=Airflow celery worker daemon
After=network.target
[Service]
EnvironmentFile=/etc/sysconfig/airflow
User=airflow
Group=airflow
Type=simple
ExecStart=/usr/local/bin/airflow celery worker -q default,sys,geo,ip-log,imes,data-warehouse,<server-name>
Restart=always
RestartSec=30s
[Install]
WantedBy=multi-user.target
airflow.cfg
[core]
dags_folder = /u01/soft_root/opt/airflow/dags
hostname_callable = socket.getfqdn
default_timezone = Asia/Ho_Chi_Minh
executor = CeleryExecutor
parallelism = 128
max_active_tasks_per_dag = 16
dags_are_paused_at_creation = True
max_active_runs_per_dag = 16
load_examples = False
plugins_folder = /u01/soft_root/opt/airflow/plugins
execute_tasks_new_python_interpreter = False
fernet_key = ***
donot_pickle = False
dagbag_import_timeout = 30.0
dagbag_import_error_tracebacks = True
dagbag_import_error_traceback_depth = 2
dag_file_processor_timeout = 50
task_runner = StandardTaskRunner
default_impersonation =
security =
unit_test_mode = False
enable_xcom_pickling = False
killed_task_cleanup_time = 60
dag_run_conf_overrides_params = True
dag_discovery_safe_mode = True
dag_ignore_file_syntax = regexp
default_task_retries = 0
default_task_weight_rule = downstream
default_task_execution_timeout =
min_serialized_dag_update_interval = 30
compress_serialized_dags = False
min_serialized_dag_fetch_interval = 10
max_num_rendered_ti_fields_per_task = 30
check_slas = True
xcom_backend = airflow.models.xcom.BaseXCom
lazy_load_plugins = True
lazy_discover_providers = True
hide_sensitive_var_conn_fields = True
sensitive_var_conn_names =
default_pool_task_slot_count = 128
max_map_length = 1024
[database]
sql_alchemy_conn = postgresql+psycopg2://***
sql_engine_encoding = utf-8
sql_alchemy_pool_enabled = True
sql_alchemy_pool_size = 2
sql_alchemy_max_overflow = 10
sql_alchemy_pool_recycle = 1800
sql_alchemy_pool_pre_ping = True
sql_alchemy_schema =
load_default_connections = False
max_db_retries = 3
[logging]
base_log_folder = /u01/soft_root/var/log/airflow
remote_logging = False
remote_log_conn_id =
google_key_path =
remote_base_log_folder =
encrypt_s3_logs = False
logging_level = INFO
celery_logging_level =
fab_logging_level = WARNING
logging_config_class = airflow_logging_settings.LOGGING_CONFIG
colored_console_log = True
colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s
colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s
task_log_prefix_template =
log_filename_template = dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{ ti.task_id }}/{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%% endif %%}attempt={{ try_number }}.log
log_processor_filename_template = {{ filename }}.log
dag_processor_manager_log_location = /u01/soft_root/var/log/airflow/dag_processor_manager/dag_processor_manager.log
task_log_reader = task
extra_logger_names =
worker_log_server_port = 8793
[metrics]
statsd_on = True
statsd_host = ***
statsd_port = 8125
statsd_prefix = airflow
statsd_allow_list =
stat_name_handler =
statsd_datadog_enabled = False
statsd_datadog_tags =
[secrets]
backend =
backend_kwargs =
[cli]
api_client = airflow.api.client.local_client
endpoint_url = http://***:8328
[debug]
fail_fast = False
[api]
enable_experimental_api = False
auth_backends = airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session
maximum_page_limit = 100
fallback_page_limit = 100
google_oauth2_audience =
google_key_path =
access_control_allow_headers =
access_control_allow_methods =
access_control_allow_origins =
[lineage]
backend =
[atlas]
sasl_enabled = False
host =
port = 21000
username =
password =
[operators]
default_owner = airflow
default_cpus = 1
default_ram = 512
default_disk = 512
default_gpus = 0
default_queue = default
allow_illegal_arguments = False
[hive]
default_hive_mapred_queue =
[webserver]
base_url = http://***:8328
default_ui_timezone = Asia/Ho_Chi_Minh
web_server_host = 0.0.0.0
web_server_port = 8328
web_server_ssl_cert =
web_server_ssl_key =
session_backend = database
web_server_master_timeout = 120
web_server_worker_timeout = 120
worker_refresh_batch_size = 1
worker_refresh_interval = 60
reload_on_plugin_change = False
secret_key = ***
workers = 4
worker_class = sync
access_logfile = -
error_logfile = -
access_logformat =
expose_config = False
expose_hostname = True
expose_stacktrace = True
dag_default_view = graph
dag_orientation = LR
log_fetch_timeout_sec = 5
log_fetch_delay_sec = 2
log_auto_tailing_offset = 30
log_animation_speed = 1000
hide_paused_dags_by_default = False
page_size = 100
default_dag_run_display_number = 25
enable_proxy_fix = False
proxy_fix_x_for = 1
proxy_fix_x_proto = 1
proxy_fix_x_host = 1
proxy_fix_x_port = 1
proxy_fix_x_prefix = 1
cookie_secure = False
cookie_samesite = Lax
default_wrap = False
x_frame_enabled = True
show_recent_stats_for_completed_runs = True
update_fab_perms = True
session_lifetime_minutes = 43200
instance_name_has_markup = False
auto_refresh_interval = 3
warn_deployment_exposure = True
audit_view_excluded_events = gantt,landing_times,tries,duration,calendar,graph,grid,tree,tree_data
[email]
email_backend = airflow.utils.email.send_email_smtp
email_conn_id = smtp_default
default_email_on_retry = False
default_email_on_failure = True
from_email = ***
[smtp]
smtp_host =
smtp_starttls = True
smtp_ssl = False
smtp_user =
smtp_password =
smtp_port =
smtp_mail_from =
smtp_timeout = 30
smtp_retry_limit = 5
[sentry]
sentry_on = false
sentry_dsn =
[local_kubernetes_executor]
kubernetes_queue = kubernetes
[celery_kubernetes_executor]
kubernetes_queue = kubernetes
[celery]
celery_app_name = airflow.executors.celery_executor
worker_concurrency = 32
worker_prefetch_multiplier = 1
worker_enable_remote_control = true
worker_umask = 0o077
broker_url = amqp://***
result_backend = db+postgresql+psycopg2://***
flower_host = 0.0.0.0
flower_url_prefix =
flower_port = 8555
flower_basic_auth =
sync_parallelism = 0
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
ssl_active = False
ssl_key =
ssl_cert =
ssl_cacert =
pool = prefork
operation_timeout = 2.0
task_track_started = True
task_adoption_timeout = 600
task_publish_max_retries = 3
worker_precheck = False
[celery_broker_transport_options]
visibility_timeout = 21600
[dask]
cluster_address = 127.0.0.1:8786
tls_ca =
tls_cert =
tls_key =
[scheduler]
job_heartbeat_sec = 5
scheduler_heartbeat_sec = 15
num_runs = -1
scheduler_idle_sleep_time = 1
min_file_process_interval = 10
deactivate_stale_dags_interval = 60
dag_dir_list_interval = 20
print_stats_interval = 30
pool_metrics_interval = 5.0
scheduler_health_check_threshold = 60
orphaned_tasks_check_interval = 300.0
child_process_log_directory = /u01/soft_root/var/log/airflow/scheduler
scheduler_zombie_task_threshold = 300
zombie_detection_interval = 10.0
catchup_by_default = True
ignore_first_depends_on_past_by_default = True
max_tis_per_query = 512
use_row_level_locking = True
max_dagruns_to_create_per_loop = 10
max_dagruns_per_loop_to_schedule = 20
schedule_after_task_execution = True
parsing_processes = 2
file_parsing_sort_mode = modified_time
standalone_dag_processor = False
max_callbacks_per_loop = 20
use_job_schedule = True
allow_trigger_in_future = False
dependency_detector = airflow.serialization.serialized_objects.DependencyDetector
trigger_timeout_check_interval = 15
[triggerer]
default_capacity = 1000
[kerberos]
ccache = /tmp/krb5cc_airflow_new
principal = ***
reinit_frequency = 3600
kinit_path = kinit
keytab = /etc/security/keytabs/airflow_new.keytab
forwardable = True
include_ip = True
[github_enterprise]
api_rev = v3
[elasticsearch]
host =
log_id_template = {dag_id}-{task_id}-{run_id}-{map_index}-{try_number}
end_of_log_mark = end_of_log
frontend =
write_stdout = False
json_format = False
json_fields = asctime, filename, lineno, levelname, message
host_field = host
offset_field = offset
[elasticsearch_configs]
use_ssl = False
verify_certs = True
[kubernetes]
pod_template_file =
worker_container_repository =
worker_container_tag =
namespace = default
delete_worker_pods = True
delete_worker_pods_on_failure = False
worker_pods_creation_batch_size = 1
multi_namespace_mode = False
in_cluster = True
kube_client_request_args =
delete_option_kwargs =
enable_tcp_keepalive = True
tcp_keep_idle = 120
tcp_keep_intvl = 30
tcp_keep_cnt = 6
verify_ssl = True
worker_pods_pending_timeout = 300
worker_pods_pending_timeout_check_interval = 120
worker_pods_queued_check_interval = 60
worker_pods_pending_timeout_batch_size = 100
[sensors]
default_timeout = 604800
[smart_sensor]
use_smart_sensor = False
shard_code_upper_limit = 10000
shards = 5
sensors_enabled = NamedHivePartitionSensor
airflow (env file in serivce)
# Path configuration for airflow
PATH=/usr/local/sbin:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin
# For SSL connection to our IPA-signed certificate
REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-bundle.crt
# Airflow configuration
AIRFLOW_CONFIG=/etc/airflow/airflow.cfg
AIRFLOW_HOME=/u01/soft_root/opt/airflow
# Prevent some airflow worker invalid warning
AIRFLOW__CORE__BASE_LOG_FOLDER=/u01/soft_root/var/log/airflow
AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION=/u01/soft_root/var/log/airflow/dag_processor_manager/dag_processor_manager.log
AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY=/u01/soft_root/var/log/airflow/scheduler
LD_LIBRARY_PATH=/usr/local/lib
I tried to restart services many times, but it didn't work out. In dags
folder, I tried to change permission to 777
, it didn't work too. Then I don't know why and how to get DAGs appeared on Airflow Web UI. Could anyone help me figure out what I missed or I am wrong? Thank you for your support.