From dcc5dfdf811f24c3adbb879e77a1bf8789bf1738 Mon Sep 17 00:00:00 2001 From: Toshio Kuratomi Date: Tue, 5 Apr 2016 23:48:37 -0700 Subject: [PATCH] Controller-side module caching. This makes our recursive, ast.parse performance measures as fast as pre-ziploader baseline. Since this unittest isn't testing that the returned module data is correct we don't need to worry about os.rename not having any module data. Should devise a separate test for the module and caching code --- bin/ansible | 23 ++++--- docsite/rst/intro_configuration.rst | 16 +++++ examples/ansible.cfg | 1 + hacking/test-module | 8 ++- lib/ansible/constants.py | 9 ++- lib/ansible/executor/module_common.py | 80 +++++++++++++++++------- lib/ansible/plugins/strategy/__init__.py | 23 ++++++- test/units/plugins/action/test_action.py | 15 ++--- 8 files changed, 130 insertions(+), 45 deletions(-) diff --git a/bin/ansible b/bin/ansible index ccd6bb8e17..59b4fec305 100755 --- a/bin/ansible +++ b/bin/ansible @@ -33,6 +33,7 @@ except Exception: pass import os +import shutil import sys import traceback @@ -40,6 +41,7 @@ import traceback from multiprocessing import Lock debug_lock = Lock() +import ansible.constants as C from ansible.errors import AnsibleError, AnsibleOptionsError, AnsibleParserError from ansible.utils.display import Display from ansible.utils.unicode import to_unicode @@ -87,28 +89,28 @@ if __name__ == '__main__': cli = mycli(sys.argv) cli.parse() - sys.exit(cli.run()) + exit_code = cli.run() except AnsibleOptionsError as e: cli.parser.print_help() display.error(to_unicode(e), wrap_text=False) - sys.exit(5) + exit_code = 5 except AnsibleParserError as e: display.error(to_unicode(e), wrap_text=False) - sys.exit(4) + exit_code = 4 # TQM takes care of these, but leaving comment to reserve the exit codes # except AnsibleHostUnreachable as e: # display.error(str(e)) -# sys.exit(3) +# exit_code = 3 # except AnsibleHostFailed as e: # display.error(str(e)) -# sys.exit(2) +# exit_code = 2 except AnsibleError as e: display.error(to_unicode(e), wrap_text=False) - sys.exit(1) + exit_code = 1 except KeyboardInterrupt: display.error("User interrupted execution") - sys.exit(99) + exit_code = 99 except Exception as e: have_cli_options = cli is not None and cli.options is not None display.error("Unexpected Exception: %s" % to_unicode(e), wrap_text=False) @@ -116,4 +118,9 @@ if __name__ == '__main__': display.display(u"the full traceback was:\n\n%s" % to_unicode(traceback.format_exc())) else: display.display("to see the full traceback, use -vvv") - sys.exit(250) + exit_code = 250 + finally: + # Remove ansible tempdir + shutil.rmtree(C.DEFAULT_LOCAL_TMP, True) + + sys.exit(exit_code) diff --git a/docsite/rst/intro_configuration.rst b/docsite/rst/intro_configuration.rst index d5820a5b5f..9628735087 100644 --- a/docsite/rst/intro_configuration.rst +++ b/docsite/rst/intro_configuration.rst @@ -452,6 +452,22 @@ This is the default location Ansible looks to find modules:: Ansible knows how to look in multiple locations if you feed it a colon separated path, and it also will look for modules in the "./library" directory alongside a playbook. +.. _local_tmp: + +local_tmp +========= + +When Ansible gets ready to send a module to a remote machine it usually has to +add a few things to the module: Some boilerplate code, the module's +parameters, and a few constants from the config file. This combination of +things gets stored in a temporary file until ansible exits and cleans up after +itself. The default location is a subdirectory of the user's home directory. +If you'd like to change that, you can do so by altering this setting:: + + local_tmp = $HOME/.ansible/tmp + +Ansible will then choose a random directory name inside this location. + .. _log_path: log_path diff --git a/examples/ansible.cfg b/examples/ansible.cfg index 672c99dfc3..2b50d71e24 100644 --- a/examples/ansible.cfg +++ b/examples/ansible.cfg @@ -14,6 +14,7 @@ #inventory = /etc/ansible/hosts #library = /usr/share/my_modules/ #remote_tmp = $HOME/.ansible/tmp +#local_tmp = $HOME/.ansible/tmp #forks = 5 #poll_interval = 15 #sudo_user = root diff --git a/hacking/test-module b/hacking/test-module index a3a6aa6fab..5d852f1eae 100755 --- a/hacking/test-module +++ b/hacking/test-module @@ -29,12 +29,14 @@ # test-module -m ../library/file/lineinfile -a "dest=/etc/exports line='/srv/home hostname1(rw,sync)'" --check # test-module -m ../library/commands/command -a "echo hello" -n -o "test_hello" -import sys import base64 +from multiprocessing import Lock +import optparse import os import subprocess +import sys import traceback -import optparse + import ansible.utils.vars as utils_vars from ansible.parsing.dataloader import DataLoader from ansible.parsing.utils.jsonify import jsonify @@ -133,10 +135,12 @@ def boilerplate_module(modfile, args, interpreter, check, destfile): modname = os.path.basename(modfile) modname = os.path.splitext(modname)[0] + action_write_lock = Lock() (module_data, module_style, shebang) = module_common.modify_module( modname, modfile, complex_args, + action_write_lock, task_vars=task_vars ) diff --git a/lib/ansible/constants.py b/lib/ansible/constants.py index cd7659a0c6..514fda8160 100644 --- a/lib/ansible/constants.py +++ b/lib/ansible/constants.py @@ -20,6 +20,7 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type import os +import tempfile from string import ascii_letters, digits from ansible.compat.six import string_types @@ -47,7 +48,7 @@ def shell_expand(path): path = os.path.expanduser(os.path.expandvars(path)) return path -def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False): +def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False, istmppath=False): ''' return a configuration variable with casting ''' value = _get_config(p, section, key, env_var, default) if boolean: @@ -65,6 +66,11 @@ def get_config(p, section, key, env_var, default, boolean=False, integer=False, value = None elif ispath: value = shell_expand(value) + elif istmppath: + value = shell_expand(value) + if not os.path.exists(value): + os.makedirs(value, 0o700) + value = tempfile.mkdtemp(prefix='ansible-local-tmp', dir=value) elif ispathlist: if isinstance(value, string_types): value = [shell_expand(x) for x in value.split(os.pathsep)] @@ -136,6 +142,7 @@ DEFAULT_HOST_LIST = get_config(p, DEFAULTS,'inventory', 'ANSIBLE_INVENTO DEFAULT_MODULE_PATH = get_config(p, DEFAULTS, 'library', 'ANSIBLE_LIBRARY', None, ispathlist=True) DEFAULT_ROLES_PATH = get_config(p, DEFAULTS, 'roles_path', 'ANSIBLE_ROLES_PATH', '/etc/ansible/roles', ispathlist=True) DEFAULT_REMOTE_TMP = get_config(p, DEFAULTS, 'remote_tmp', 'ANSIBLE_REMOTE_TEMP', '$HOME/.ansible/tmp') +DEFAULT_LOCAL_TMP = get_config(p, DEFAULTS, 'local_tmp', 'ANSIBLE_LOCAL_TEMP', '$HOME/.ansible/tmp', istmppath=True) DEFAULT_MODULE_NAME = get_config(p, DEFAULTS, 'module_name', None, 'command') DEFAULT_FORKS = get_config(p, DEFAULTS, 'forks', 'ANSIBLE_FORKS', 5, integer=True) DEFAULT_MODULE_ARGS = get_config(p, DEFAULTS, 'module_args', 'ANSIBLE_MODULE_ARGS', '') diff --git a/lib/ansible/executor/module_common.py b/lib/ansible/executor/module_common.py index 9685411dc7..144af8c100 100644 --- a/lib/ansible/executor/module_common.py +++ b/lib/ansible/executor/module_common.py @@ -33,6 +33,7 @@ from ansible import __version__ from ansible import constants as C from ansible.errors import AnsibleError from ansible.utils.unicode import to_bytes, to_unicode +from ansible.plugins.strategy import action_write_locks try: from __main__ import display @@ -275,7 +276,11 @@ def _get_facility(task_vars): facility = task_vars['ansible_syslog_facility'] return facility -def meta_finder(data, snippet_names, snippet_data, zf): +def recursive_finder(data, snippet_names, snippet_data, zf): + """ + Using ModuleDepFinder, make sure we have all of the module_utils files that + the module its module_utils files needs. + """ tree = ast.parse(data) finder = ModuleDepFinder() finder.visit(tree) @@ -290,7 +295,7 @@ def meta_finder(data, snippet_names, snippet_data, zf): snippet_names.update(new_snippets) for snippet_name in tuple(new_snippets): - meta_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf) + recursive_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf) del snippet_data[snippet_name] def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression): @@ -350,23 +355,61 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta except AttributeError: display.warning(u'Bad module compression string specified: %s. Using ZIP_STORED (no compression)' % module_compression) compression_method = zipfile.ZIP_STORED - zipoutput = BytesIO() - zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method) - zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n"))) - zf.writestr('ansible/module_utils/__init__.py', b'') - zf.writestr('ansible/module_exec/__init__.py', b'') - zf.writestr('ansible/module_exec/%s/__init__.py' % module_name, b"") - zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data) + lookup_path = os.path.join(C.DEFAULT_LOCAL_TMP, 'ziploader_cache') + if not os.path.exists(lookup_path): + os.mkdir(lookup_path) + cached_module_filename = os.path.join(lookup_path, "%s-%s" % (module_name, module_compression)) - snippet_data = dict() - meta_finder(module_data, snippet_names, snippet_data, zf) - zf.close() + zipdata = None + # Optimization -- don't lock if the module has already been cached + if os.path.exists(cached_module_filename): + zipdata = open(cached_module_filename, 'rb').read() + # Fool the check later... I think we should just remove the check + snippet_names.add('basic') + else: + with action_write_locks[module_name]: + # Check that no other process has created this while we were + # waiting for the lock + if not os.path.exists(cached_module_filename): + # Create the module zip data + zipoutput = BytesIO() + zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method) + zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n"))) + zf.writestr('ansible/module_utils/__init__.py', b'') + zf.writestr('ansible/module_exec/__init__.py', b'') + + zf.writestr('ansible/module_exec/%s/__init__.py' % module_name, b"") + zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data) + + snippet_data = dict() + recursive_finder(module_data, snippet_names, snippet_data, zf) + zf.close() + zipdata = base64.b64encode(zipoutput.getvalue()) + + # Write the assembled module to a temp file (write to temp + # so that no one looking for the file reads a partially + # written file) + with open(cached_module_filename + '-part', 'w') as f: + f.write(zipdata) + + # Rename the file into its final position in the cache so + # future users of this module can read it off the + # filesystem instead of constructing from scratch. + os.rename(cached_module_filename + '-part', cached_module_filename) + + if zipdata is None: + # Another process wrote the file while we were waiting for + # the write lock. Go ahead and read the data from disk + # instead of re-creating it. + zipdata = open(cached_module_filename, 'rb').read() + # Fool the check later... I think we should just remove the check + snippet_names.add('basic') shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars) if shebang is None: shebang = u'#!/usr/bin/python' output.write(to_bytes(STRIPPED_ZIPLOADER_TEMPLATE % dict( - zipdata=base64.b64encode(zipoutput.getvalue()), + zipdata=zipdata, ansible_module=module_name, args=python_repred_args, constants=python_repred_constants, @@ -450,17 +493,6 @@ def modify_module(module_name, module_path, module_args, task_vars=dict(), modul which results in the inclusion of the common code from powershell.ps1 """ - ### TODO: Optimization ideas if this code is actually a source of slowness: - # * Fix comment stripping: Currently doesn't preserve shebangs and encoding info (but we unconditionally add encoding info) - # * Use pyminifier if installed - # * comment stripping/pyminifier needs to have config setting to turn it - # off for debugging purposes (goes along with keep remote but should be - # separate otherwise users wouldn't be able to get info on what the - # minifier output) - # * Only split into lines and recombine into strings once - # * Cache the modified module? If only the args are different and we do - # that as the last step we could cache all the work up to that point. - with open(module_path, 'rb') as f: # read in the module source diff --git a/lib/ansible/plugins/strategy/__init__.py b/lib/ansible/plugins/strategy/__init__.py index 7b2a40a71a..f06d4f6f75 100644 --- a/lib/ansible/plugins/strategy/__init__.py +++ b/lib/ansible/plugins/strategy/__init__.py @@ -19,15 +19,16 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type -from ansible.compat.six.moves import queue as Queue -from ansible.compat.six import iteritems, text_type, string_types - import json import time import zlib +from collections import defaultdict +from multiprocessing import Lock from jinja2.exceptions import UndefinedError +from ansible.compat.six.moves import queue as Queue +from ansible.compat.six import iteritems, text_type, string_types from ansible import constants as C from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable from ansible.executor.play_iterator import PlayIterator @@ -51,6 +52,8 @@ except ImportError: __all__ = ['StrategyBase'] +action_write_locks = defaultdict(Lock) + # TODO: this should probably be in the plugins/__init__.py, with # a smarter mechanism to set all of the attributes based on @@ -141,6 +144,20 @@ class StrategyBase: display.debug("entering _queue_task() for %s/%s" % (host, task)) + # Add a write lock for tasks. + # Maybe this should be added somewhere further up the call stack but + # this is the earliest in the code where we have task (1) extracted + # into its own variable and (2) there's only a single code path + # leading to the module being run. This is called by three + # functions: __init__.py::_do_handler_run(), linear.py::run(), and + # free.py::run() so we'd have to add to all three to do it there. + # The next common higher level is __init__.py::run() and that has + # tasks inside of play_iterator so we'd have to extract them to do it + # there. + if not action_write_locks[task.action]: + display.warning('Python defaultdict did not create the Lock for us. Creating manually') + action_write_locks[task.action] = Lock() + # and then queue the new task display.debug("%s - putting task (%s) in queue" % (host, task)) try: diff --git a/test/units/plugins/action/test_action.py b/test/units/plugins/action/test_action.py index dc6324d924..d0ea6ac789 100644 --- a/test/units/plugins/action/test_action.py +++ b/test/units/plugins/action/test_action.py @@ -212,14 +212,15 @@ class TestActionBase(unittest.TestCase): # test python module formatting with patch.object(builtins, 'open', mock_open(read_data=to_bytes(python_module_replacers.strip(), encoding='utf-8'))) as m: - mock_task.args = dict(a=1, foo='fö〩') - mock_connection.module_implementation_preferences = ('',) - (style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args) - self.assertEqual(style, "new") - self.assertEqual(shebang, b"#!/usr/bin/python") + with patch.object(os, 'rename') as m: + mock_task.args = dict(a=1, foo='fö〩') + mock_connection.module_implementation_preferences = ('',) + (style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args) + self.assertEqual(style, "new") + self.assertEqual(shebang, b"#!/usr/bin/python") - # test module not found - self.assertRaises(AnsibleError, action_base._configure_module, 'badmodule', mock_task.args) + # test module not found + self.assertRaises(AnsibleError, action_base._configure_module, 'badmodule', mock_task.args) # test powershell module formatting with patch.object(builtins, 'open', mock_open(read_data=to_bytes(powershell_module_replacers.strip(), encoding='utf-8'))) as m: