From e8ad36c8d4127efdd179804c032b5fa73c047f7c Mon Sep 17 00:00:00 2001
From: Jesse Keating <jesse.keating@rackspace.com>
Date: Mon, 6 Jan 2014 18:16:43 -0800
Subject: [PATCH 1/2] Store hosts for a play as a play attribute

Operate on that play attribute to make things faster for larger
inventories. Instead of making a round trip through inventory.list_hosts
and working through some lengthy list comprehensions over and over
again, calculate the potenital hosts for a play once, then reduce from
it the unavailable hosts when necessary.

Also moves how the %fail is done. The host count is a play level count
of available hosts, which then is compared after each task to the
current number of available hosts for the play. This used to get a new
count every task which was also time expensive.
---
 lib/ansible/playbook/__init__.py | 26 ++++++++++++++++----------
 lib/ansible/playbook/play.py     |  4 +++-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/lib/ansible/playbook/__init__.py b/lib/ansible/playbook/__init__.py
index dc7991aaf7..af234a1f77 100644
--- a/lib/ansible/playbook/__init__.py
+++ b/lib/ansible/playbook/__init__.py
@@ -289,17 +289,17 @@ class PlayBook(object):
 
     # *****************************************************
 
-    def _list_available_hosts(self, *args):
+    def _trim_unavailable_hosts(self, hostlist=[]):
         ''' returns a list of hosts that haven't failed and aren't dark '''
 
-        return [ h for h in self.inventory.list_hosts(*args) if (h not in self.stats.failures) and (h not in self.stats.dark)]
+        return [ h for h in hostlist if (h not in self.stats.failures) and (h not in self.stats.dark)]
 
     # *****************************************************
 
     def _run_task_internal(self, task):
         ''' run a particular module step in a playbook '''
 
-        hosts = self._list_available_hosts()
+        hosts = self._trim_unavailable_hosts(task.play._play_hosts)
         self.inventory.restrict_to(hosts)
 
         runner = ansible.runner.Runner(
@@ -436,7 +436,7 @@ class PlayBook(object):
         if play.gather_facts is False:
             return {}
 
-        host_list = self._list_available_hosts(play.hosts)
+        host_list = self._trim_unavailable_hosts(play._play_hosts)
 
         self.callbacks.on_setup()
         self.inventory.restrict_to(host_list)
@@ -499,8 +499,10 @@ class PlayBook(object):
         ''' run a list of tasks for a given pattern, in order '''
 
         self.callbacks.on_play_start(play.name)
+        # Get the hosts for this play
+        play._play_hosts = self.inventory.list_hosts(play.hosts)
         # if no hosts matches this play, drop out
-        if not self.inventory.list_hosts(play.hosts):
+        if not play._play_hosts:
             self.callbacks.on_no_hosts_matched()
             return True
 
@@ -509,8 +511,9 @@ class PlayBook(object):
 
         # now with that data, handle contentional variable file imports!
 
-        all_hosts = self._list_available_hosts(play.hosts)
+        all_hosts = self._trim_unavailable_hosts(play._play_hosts)
         play.update_vars_files(all_hosts)
+        hosts_count = len(all_hosts)
 
         serialized_batch = []
         if play.serial <= 0:
@@ -526,6 +529,9 @@ class PlayBook(object):
 
         for on_hosts in serialized_batch:
 
+            # restrict the play to just the hosts we have in our on_hosts block that are
+            # available.
+            play._play_hosts = self._trim_unavailable_hosts(on_hosts)
             self.inventory.also_restrict_to(on_hosts)
 
             for task in play.tasks():
@@ -548,7 +554,7 @@ class PlayBook(object):
                                 # prevent duplicate handler includes from running more than once
                                 fired_names[handler_name] = 1
 
-                                host_list = self._list_available_hosts(play.hosts)
+                                host_list = self._trim_unavailable_hosts(play._play_hosts)
                                 if handler.any_errors_fatal and len(host_list) < hosts_count:
                                     play.max_fail_pct = 0
                                 if (hosts_count - len(host_list)) > int((play.max_fail_pct)/100.0 * hosts_count):
@@ -567,8 +573,6 @@ class PlayBook(object):
 
                         continue
 
-                hosts_count = len(self._list_available_hosts(play.hosts))
-
                 # only run the task if the requested tags match
                 should_run = False
                 for x in self.only_tags:
@@ -591,7 +595,9 @@ class PlayBook(object):
                         # just didn't match anything and that's ok
                         return False
 
-                host_list = self._list_available_hosts(play.hosts)
+                # Get a new list of what hosts are left as available, the ones that
+                # did not go fail/dark during the task
+                host_list = self._trim_unavailable_hosts(play._play_hosts)
 
                 # Set max_fail_pct to 0, So if any hosts fails, bail out
                 if task.any_errors_fatal and len(host_list) < hosts_count:
diff --git a/lib/ansible/playbook/play.py b/lib/ansible/playbook/play.py
index 92a40ba6ec..87af906e5f 100644
--- a/lib/ansible/playbook/play.py
+++ b/lib/ansible/playbook/play.py
@@ -34,7 +34,7 @@ class Play(object):
        'handlers', 'remote_user', 'remote_port', 'included_roles', 'accelerate',
        'accelerate_port', 'accelerate_ipv6', 'sudo', 'sudo_user', 'transport', 'playbook',
        'tags', 'gather_facts', 'serial', '_ds', '_handlers', '_tasks',
-       'basedir', 'any_errors_fatal', 'roles', 'max_fail_pct'
+       'basedir', 'any_errors_fatal', 'roles', 'max_fail_pct', '_play_hosts'
     ]
 
     # to catch typos and so forth -- these are userland names
@@ -134,6 +134,8 @@ class Play(object):
         if self.sudo_user != 'root':
             self.sudo = True
 
+        # place holder for the discovered hosts to be used in this play
+        self._play_hosts = None
 
     # *************************************************
 

From 6013f0738eed9a396414663cc55528d1e9ace2a7 Mon Sep 17 00:00:00 2001
From: Jesse Keating <jesse.keating@rackspace.com>
Date: Mon, 6 Jan 2014 20:13:39 -0800
Subject: [PATCH 2/2] Store the list of hosts to run on in runner object

This reduces the number of times inventory.list_hosts is called, which
can be costly. When coming from a playbook that data is already known.
---
 lib/ansible/playbook/__init__.py |  3 ++-
 lib/ansible/runner/__init__.py   | 11 +++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/lib/ansible/playbook/__init__.py b/lib/ansible/playbook/__init__.py
index af234a1f77..5dd81b094c 100644
--- a/lib/ansible/playbook/__init__.py
+++ b/lib/ansible/playbook/__init__.py
@@ -316,7 +316,8 @@ class PlayBook(object):
             check=self.check, diff=self.diff, environment=task.environment, complex_args=task.args,
             accelerate=task.play.accelerate, accelerate_port=task.play.accelerate_port,
             accelerate_ipv6=task.play.accelerate_ipv6,
-            error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR
+            error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR,
+            run_hosts=hosts
         )
 
         if task.async_seconds == 0:
diff --git a/lib/ansible/runner/__init__.py b/lib/ansible/runner/__init__.py
index ef8d4aac21..c195cefe24 100644
--- a/lib/ansible/runner/__init__.py
+++ b/lib/ansible/runner/__init__.py
@@ -141,6 +141,7 @@ class Runner(object):
         accelerate=False,                   # use accelerated connection
         accelerate_ipv6=False,              # accelerated connection w/ IPv6
         accelerate_port=None,               # port to use with accelerated connection
+        run_hosts=None,                     # an optional list of pre-calculated hosts to run on
         ):
 
         # used to lock multiprocess inputs and outputs at various levels
@@ -205,6 +206,10 @@ class Runner(object):
             # don't override subset when passed from playbook
             self.inventory.subset(subset)
 
+        # If we get a pre-built list of hosts to run on, from say a playbook, use them.
+        # Also where we will store the hosts to run on once discovered
+        self.run_hosts = run_hosts
+
         if self.transport == 'local':
             self.remote_user = pwd.getpwuid(os.geteuid())[0]
 
@@ -1002,7 +1007,7 @@ class Runner(object):
                 results2["dark"][host] = result.result
 
         # hosts which were contacted but never got a chance to return
-        for host in self.inventory.list_hosts(self.pattern):
+        for host in self.run_hosts:
             if not (host in results2['dark'] or host in results2['contacted']):
                 results2["dark"][host] = {}
         return results2
@@ -1013,7 +1018,9 @@ class Runner(object):
         ''' xfer & run module on all matched hosts '''
 
         # find hosts that match the pattern
-        hosts = self.inventory.list_hosts(self.pattern)
+        if not self.run_hosts:
+            self.run_hosts = self.inventory.list_hosts(self.pattern)
+        hosts = self.run_hosts
         if len(hosts) == 0:
             self.callbacks.on_no_hosts()
             return dict(contacted={}, dark={})