mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
correctly handle term signals
- adhoc now terminates gracefully - avoid race condition on terminations by ignoring errors if worker might have been reaped between checking if active and termination call - ansible-playbook now properly exits on sigint/term - adhoc and playbook now give exceptions that we should not normally capture and rely on top level finally to reap children - handle systemexit breaks in workers - added debug to see at which frame we exit partial fix for #14346
This commit is contained in:
parent
45755bc0e5
commit
5a1887cc76
5 changed files with 21 additions and 8 deletions
|
@ -21,6 +21,7 @@ __metaclass__ = type
|
|||
########################################################
|
||||
|
||||
import os
|
||||
import signal
|
||||
|
||||
from ansible import constants as C
|
||||
from ansible.cli import CLI
|
||||
|
@ -88,6 +89,10 @@ class AdHocCLI(CLI):
|
|||
tasks = [ dict(action=dict(module=self.options.module_name, args=parse_kv(self.options.module_args)), async=async, poll=poll) ]
|
||||
)
|
||||
|
||||
def _terminate(self, signum=None, framenum=None):
|
||||
if signum is not None:
|
||||
raise SystemExit("Interrupt detected, shutting down gracefully")
|
||||
|
||||
def run(self):
|
||||
''' use Runner lib to do SSH things '''
|
||||
|
||||
|
@ -170,6 +175,9 @@ class AdHocCLI(CLI):
|
|||
# now create a task queue manager to execute the play
|
||||
self._tqm = None
|
||||
try:
|
||||
# Manage user interruptions
|
||||
signal.signal(signal.SIGTERM, self._terminate)
|
||||
|
||||
self._tqm = TaskQueueManager(
|
||||
inventory=inventory,
|
||||
variable_manager=variable_manager,
|
||||
|
@ -180,6 +188,7 @@ class AdHocCLI(CLI):
|
|||
run_additional_callbacks=C.DEFAULT_LOAD_CALLBACK_PLUGINS,
|
||||
run_tree=run_tree,
|
||||
)
|
||||
|
||||
result = self._tqm.run(play)
|
||||
finally:
|
||||
if self._tqm:
|
||||
|
|
|
@ -69,7 +69,7 @@ class PlaybookExecutor:
|
|||
may limit the runs to serialized groups, etc.
|
||||
'''
|
||||
|
||||
signal.signal(signal.SIGINT, self._cleanup)
|
||||
signal.signal(signal.SIGTERM, self._terminate)
|
||||
|
||||
result = 0
|
||||
entrylist = []
|
||||
|
@ -199,7 +199,7 @@ class PlaybookExecutor:
|
|||
|
||||
finally:
|
||||
if self._tqm is not None:
|
||||
self._cleanup()
|
||||
self._tqm.cleanup()
|
||||
|
||||
if self._options.syntax:
|
||||
display.display("No issues encountered")
|
||||
|
@ -207,8 +207,9 @@ class PlaybookExecutor:
|
|||
|
||||
return result
|
||||
|
||||
def _cleanup(self, signum=None, framenum=None):
|
||||
return self._tqm.cleanup()
|
||||
def _terminate(self, signum=None, framenum=None):
|
||||
display.debug(framenum)
|
||||
raise SystemExit("Terminating run due to external signal")
|
||||
|
||||
def _get_serialized_batches(self, play):
|
||||
'''
|
||||
|
|
|
@ -163,7 +163,7 @@ class ResultProcess(multiprocessing.Process):
|
|||
|
||||
except queue.Empty:
|
||||
pass
|
||||
except (KeyboardInterrupt, IOError, EOFError):
|
||||
except (KeyboardInterrupt, SystemExit, IOError, EOFError):
|
||||
break
|
||||
except:
|
||||
# TODO: we should probably send a proper callback here instead of
|
||||
|
|
|
@ -132,7 +132,7 @@ class WorkerProcess(multiprocessing.Process):
|
|||
self._rslt_q.put(task_result, block=False)
|
||||
|
||||
except Exception as e:
|
||||
if not isinstance(e, (IOError, EOFError, KeyboardInterrupt)) or isinstance(e, TemplateNotFound):
|
||||
if not isinstance(e, (IOError, EOFError, KeyboardInterrupt, SystemExit)) or isinstance(e, TemplateNotFound):
|
||||
try:
|
||||
self._host.vars = dict()
|
||||
self._host.groups = []
|
||||
|
@ -140,7 +140,7 @@ class WorkerProcess(multiprocessing.Process):
|
|||
self._rslt_q.put(task_result, block=False)
|
||||
except:
|
||||
debug(u"WORKER EXCEPTION: %s" % to_unicode(e))
|
||||
debug(u"WORKER EXCEPTION: %s" % to_unicode(traceback.format_exc()))
|
||||
debug(u"WORKER TRACEBACK: %s" % to_unicode(traceback.format_exc()))
|
||||
|
||||
debug("WORKER PROCESS EXITING")
|
||||
|
||||
|
|
|
@ -253,7 +253,10 @@ class TaskQueueManager:
|
|||
rslt_q.close()
|
||||
main_q.close()
|
||||
if worker_prc and worker_prc.is_alive():
|
||||
try:
|
||||
worker_prc.terminate()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
def clear_failed_hosts(self):
|
||||
self._failed_hosts = dict()
|
||||
|
|
Loading…
Reference in a new issue