From 4fb9e54c5045098e9908a1487dd18cb1eb0d15e0 Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Mon, 30 Oct 2017 16:39:15 -0400 Subject: [PATCH] calculate max fail against all hosts in batch currently it is doing only from the 'active' hosts in the batch which means the percentage goes up as hosts fail instead of staying the same. added debug info for max fail fixes #32255 --- lib/ansible/executor/play_iterator.py | 4 ++- lib/ansible/executor/task_queue_manager.py | 29 ++++++---------------- lib/ansible/plugins/strategy/linear.py | 3 ++- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/lib/ansible/executor/play_iterator.py b/lib/ansible/executor/play_iterator.py index 329a9f018c..158ed0d9b9 100644 --- a/lib/ansible/executor/play_iterator.py +++ b/lib/ansible/executor/play_iterator.py @@ -203,7 +203,9 @@ class PlayIterator: self._host_states = {} start_at_matched = False - for host in inventory.get_hosts(self._play.hosts): + batch = inventory.get_hosts(self._play.hosts) + self.batch_size = len(batch) + for host in batch: self._host_states[host.name] = HostState(blocks=self._blocks) # if we're looking to start at a specific task, iterate through # the tasks for this host until we find the specified task diff --git a/lib/ansible/executor/task_queue_manager.py b/lib/ansible/executor/task_queue_manager.py index d8f41ccf65..9270d16aab 100644 --- a/lib/ansible/executor/task_queue_manager.py +++ b/lib/ansible/executor/task_queue_manager.py @@ -242,22 +242,6 @@ class TaskQueueManager: loader=self._loader, ) - # Fork # of forks, # of hosts or serial, whichever is lowest - num_hosts = len(self._inventory.get_hosts(new_play.hosts, ignore_restrictions=True)) - - max_serial = 0 - if new_play.serial: - # the play has not been post_validated here, so we may need - # to convert the scalar value to a list at this point - serial_items = new_play.serial - if not isinstance(serial_items, list): - serial_items = [serial_items] - max_serial = max([pct_to_int(x, num_hosts) for x in serial_items]) - - contenders = [self._options.forks, max_serial, num_hosts] - contenders = [v for v in contenders if v is not None and v > 0] - self._initialize_processes(min(contenders)) - play_context = PlayContext(new_play, self._options, self.passwords, self._connection_lockfile.fileno()) for callback_plugin in self._callback_plugins: if hasattr(callback_plugin, 'set_play_context'): @@ -268,11 +252,6 @@ class TaskQueueManager: # initialize the shared dictionary containing the notified handlers self._initialize_notified_handlers(new_play) - # load the specified strategy (or the default linear one) - strategy = strategy_loader.get(new_play.strategy, self) - if strategy is None: - raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds) - # build the iterator iterator = PlayIterator( inventory=self._inventory, @@ -283,6 +262,14 @@ class TaskQueueManager: start_at_done=self._start_at_done, ) + # adjust to # of workers to configured forks or size of batch, whatever is lower + self._initialize_processes(min(self._options.forks, iterator.batch_size)) + + # load the specified strategy (or the default linear one) + strategy = strategy_loader.get(new_play.strategy, self) + if strategy is None: + raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds) + # Because the TQM may survive multiple play runs, we start by marking # any hosts as failed in the iterator here which may have been marked # as failed in previous runs. Then we clear the internal list of failed diff --git a/lib/ansible/plugins/strategy/linear.py b/lib/ansible/plugins/strategy/linear.py index 3db70d2c1c..9beba1f283 100644 --- a/lib/ansible/plugins/strategy/linear.py +++ b/lib/ansible/plugins/strategy/linear.py @@ -401,7 +401,7 @@ class StrategyModule(StrategyBase): if iterator._play.max_fail_percentage is not None and len(results) > 0: percentage = iterator._play.max_fail_percentage / 100.0 - if (len(self._tqm._failed_hosts) / len(results)) > percentage: + if (len(self._tqm._failed_hosts) / iterator.batch_size) > percentage: for host in hosts_left: # don't double-mark hosts, or the iterator will potentially # fail them out of the rescue/always states @@ -410,6 +410,7 @@ class StrategyModule(StrategyBase): iterator.mark_host_failed(host) self._tqm.send_callback('v2_playbook_on_no_hosts_remaining') result |= self._tqm.RUN_FAILED_BREAK_PLAY + display.debug('(%s failed / %s total )> %s max fail' % (len(self._tqm._failed_hosts), iterator.batch_size, percentage)) display.debug("done checking for max_fail_percentage") display.debug("checking to see if all hosts have failed and the running result is not ok")