Option 1: recommended kill the child process/process tree, then the main process.
For graceful exit, kill the process tree first
, then kill the process. Below you will see the two steps, which you can modify this or wrap it as a kill operator for your custom implementation based on Airflows kill commands
Step 1a
def kill_process_tree(logger, pid, timeout=DEFAULT_TIME_TO_WAIT_AFTER_SIGTERM):
"""
Kills the process's descendants. Kills using the `kill`
shell command so that it can change users. Note: killing via PIDs
has the potential to the wrong process if the process dies and the
PID gets recycled in a narrow time window.
:param logger: logger
:type logger: logging.Logger
:param pid: process id of the root process
:type pid: int
:param timeout: time (seconds) to wait on a process to terminate before
attempting to SIGKILL
:type timeout: int
"""
try:
root_process = psutil.Process(pid)
except psutil.NoSuchProcess:
logger.warning("PID: {} does not exist".format(pid))
return
# Check child processes to reduce cases where a child process died but the PID
# got reused
running_descendants = \
[x for x in root_process.children(recursive=True) if x.is_running()]
if len(running_descendants) != 0:
logger.info("Terminating descendant processes of {} PID: {}"
.format(root_process.cmdline(), root_process.pid))
_, running_descendants = kill_processes(logger, running_descendants,
sig=signal.SIGTERM, timeout=timeout)
else:
logger.debug("There are no descendant processes to terminate.")
logger.info("Terminating root process PID: {}.".format(root_process.pid))
_, running_root = kill_processes(logger, [root_process],
sig=signal.SIGTERM, timeout=timeout)
if running_root or running_descendants:
kill_processes(logger, running_root + running_descendants, sig=signal.SIGKILL)
Step 1 b, now kill the process
def kill_processes(logger, processes, sig=signal.SIGTERM, timeout=None):
for p in processes:
try:
if sig == signal.SIGTERM:
logger.info("Terminating process {} PID: {}".format(p.cmdline(), p.pid))
p.terminate()
elif sig == signal.SIGKILL:
logger.info("Killing process {} PID: {}".format(p.cmdline(), p.pid))
p.kill()
except psutil.NoSuchProcess as e:
logger.warning("Process {} no longer exists".format(p.pid))
except Exception as e:
logger.error("Encountered exception while attempting to kill pid "
"{} with signal {}:\n{}".format(p.pid, sig, e))
logger.info("Waiting up to {}s for processes to exit...".format(timeout))
return psutil.wait_procs(processes, timeout)
Option 2:
Kill the container/worker machine
// From a worker machine
airflow celery stop