From e8ad36c8d4127efdd179804c032b5fa73c047f7c Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Mon, 6 Jan 2014 18:16:43 -0800 Subject: [PATCH 1/2] Store hosts for a play as a play attribute Operate on that play attribute to make things faster for larger inventories. Instead of making a round trip through inventory.list_hosts and working through some lengthy list comprehensions over and over again, calculate the potenital hosts for a play once, then reduce from it the unavailable hosts when necessary. Also moves how the %fail is done. The host count is a play level count of available hosts, which then is compared after each task to the current number of available hosts for the play. This used to get a new count every task which was also time expensive. --- lib/ansible/playbook/__init__.py | 26 ++++++++++++++++---------- lib/ansible/playbook/play.py | 4 +++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/ansible/playbook/__init__.py b/lib/ansible/playbook/__init__.py index dc7991aaf7..af234a1f77 100644 --- a/lib/ansible/playbook/__init__.py +++ b/lib/ansible/playbook/__init__.py @@ -289,17 +289,17 @@ class PlayBook(object): # ***************************************************** - def _list_available_hosts(self, *args): + def _trim_unavailable_hosts(self, hostlist=[]): ''' returns a list of hosts that haven't failed and aren't dark ''' - return [ h for h in self.inventory.list_hosts(*args) if (h not in self.stats.failures) and (h not in self.stats.dark)] + return [ h for h in hostlist if (h not in self.stats.failures) and (h not in self.stats.dark)] # ***************************************************** def _run_task_internal(self, task): ''' run a particular module step in a playbook ''' - hosts = self._list_available_hosts() + hosts = self._trim_unavailable_hosts(task.play._play_hosts) self.inventory.restrict_to(hosts) runner = ansible.runner.Runner( @@ -436,7 +436,7 @@ class PlayBook(object): if play.gather_facts is False: return {} - host_list = self._list_available_hosts(play.hosts) + host_list = self._trim_unavailable_hosts(play._play_hosts) self.callbacks.on_setup() self.inventory.restrict_to(host_list) @@ -499,8 +499,10 @@ class PlayBook(object): ''' run a list of tasks for a given pattern, in order ''' self.callbacks.on_play_start(play.name) + # Get the hosts for this play + play._play_hosts = self.inventory.list_hosts(play.hosts) # if no hosts matches this play, drop out - if not self.inventory.list_hosts(play.hosts): + if not play._play_hosts: self.callbacks.on_no_hosts_matched() return True @@ -509,8 +511,9 @@ class PlayBook(object): # now with that data, handle contentional variable file imports! - all_hosts = self._list_available_hosts(play.hosts) + all_hosts = self._trim_unavailable_hosts(play._play_hosts) play.update_vars_files(all_hosts) + hosts_count = len(all_hosts) serialized_batch = [] if play.serial <= 0: @@ -526,6 +529,9 @@ class PlayBook(object): for on_hosts in serialized_batch: + # restrict the play to just the hosts we have in our on_hosts block that are + # available. + play._play_hosts = self._trim_unavailable_hosts(on_hosts) self.inventory.also_restrict_to(on_hosts) for task in play.tasks(): @@ -548,7 +554,7 @@ class PlayBook(object): # prevent duplicate handler includes from running more than once fired_names[handler_name] = 1 - host_list = self._list_available_hosts(play.hosts) + host_list = self._trim_unavailable_hosts(play._play_hosts) if handler.any_errors_fatal and len(host_list) < hosts_count: play.max_fail_pct = 0 if (hosts_count - len(host_list)) > int((play.max_fail_pct)/100.0 * hosts_count): @@ -567,8 +573,6 @@ class PlayBook(object): continue - hosts_count = len(self._list_available_hosts(play.hosts)) - # only run the task if the requested tags match should_run = False for x in self.only_tags: @@ -591,7 +595,9 @@ class PlayBook(object): # just didn't match anything and that's ok return False - host_list = self._list_available_hosts(play.hosts) + # Get a new list of what hosts are left as available, the ones that + # did not go fail/dark during the task + host_list = self._trim_unavailable_hosts(play._play_hosts) # Set max_fail_pct to 0, So if any hosts fails, bail out if task.any_errors_fatal and len(host_list) < hosts_count: diff --git a/lib/ansible/playbook/play.py b/lib/ansible/playbook/play.py index 92a40ba6ec..87af906e5f 100644 --- a/lib/ansible/playbook/play.py +++ b/lib/ansible/playbook/play.py @@ -34,7 +34,7 @@ class Play(object): 'handlers', 'remote_user', 'remote_port', 'included_roles', 'accelerate', 'accelerate_port', 'accelerate_ipv6', 'sudo', 'sudo_user', 'transport', 'playbook', 'tags', 'gather_facts', 'serial', '_ds', '_handlers', '_tasks', - 'basedir', 'any_errors_fatal', 'roles', 'max_fail_pct' + 'basedir', 'any_errors_fatal', 'roles', 'max_fail_pct', '_play_hosts' ] # to catch typos and so forth -- these are userland names @@ -134,6 +134,8 @@ class Play(object): if self.sudo_user != 'root': self.sudo = True + # place holder for the discovered hosts to be used in this play + self._play_hosts = None # ************************************************* From 6013f0738eed9a396414663cc55528d1e9ace2a7 Mon Sep 17 00:00:00 2001 From: Jesse Keating Date: Mon, 6 Jan 2014 20:13:39 -0800 Subject: [PATCH 2/2] Store the list of hosts to run on in runner object This reduces the number of times inventory.list_hosts is called, which can be costly. When coming from a playbook that data is already known. --- lib/ansible/playbook/__init__.py | 3 ++- lib/ansible/runner/__init__.py | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/ansible/playbook/__init__.py b/lib/ansible/playbook/__init__.py index af234a1f77..5dd81b094c 100644 --- a/lib/ansible/playbook/__init__.py +++ b/lib/ansible/playbook/__init__.py @@ -316,7 +316,8 @@ class PlayBook(object): check=self.check, diff=self.diff, environment=task.environment, complex_args=task.args, accelerate=task.play.accelerate, accelerate_port=task.play.accelerate_port, accelerate_ipv6=task.play.accelerate_ipv6, - error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR + error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR, + run_hosts=hosts ) if task.async_seconds == 0: diff --git a/lib/ansible/runner/__init__.py b/lib/ansible/runner/__init__.py index ef8d4aac21..c195cefe24 100644 --- a/lib/ansible/runner/__init__.py +++ b/lib/ansible/runner/__init__.py @@ -141,6 +141,7 @@ class Runner(object): accelerate=False, # use accelerated connection accelerate_ipv6=False, # accelerated connection w/ IPv6 accelerate_port=None, # port to use with accelerated connection + run_hosts=None, # an optional list of pre-calculated hosts to run on ): # used to lock multiprocess inputs and outputs at various levels @@ -205,6 +206,10 @@ class Runner(object): # don't override subset when passed from playbook self.inventory.subset(subset) + # If we get a pre-built list of hosts to run on, from say a playbook, use them. + # Also where we will store the hosts to run on once discovered + self.run_hosts = run_hosts + if self.transport == 'local': self.remote_user = pwd.getpwuid(os.geteuid())[0] @@ -1002,7 +1007,7 @@ class Runner(object): results2["dark"][host] = result.result # hosts which were contacted but never got a chance to return - for host in self.inventory.list_hosts(self.pattern): + for host in self.run_hosts: if not (host in results2['dark'] or host in results2['contacted']): results2["dark"][host] = {} return results2 @@ -1013,7 +1018,9 @@ class Runner(object): ''' xfer & run module on all matched hosts ''' # find hosts that match the pattern - hosts = self.inventory.list_hosts(self.pattern) + if not self.run_hosts: + self.run_hosts = self.inventory.list_hosts(self.pattern) + hosts = self.run_hosts if len(hosts) == 0: self.callbacks.on_no_hosts() return dict(contacted={}, dark={})