From 95e77ac853f902f4b19c70be41e5e3cb905bd193 Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Thu, 20 Sep 2018 12:31:48 -0500 Subject: [PATCH] Ensure that the src file contents is converted to unicode in diff info (#45744) * Ensure that the src file contents is converted to unicode in diff info. Fixes #45717 * Fix up and cleanup * The diff functionality in the callback plugins should have the to_text() calls removed since we're now doing it in ActionBase * catching of UnicodeError and warnings in the callback diff functionality from 61d01f549f2143fd9adfa4ffae42f09d24649c26 haven't been needed since we switched to to_text so remove them. * Add a note to ActionBase's diff function giving an example of when the diff function will be inaccurate and how to fix it * Fix callback get_diff() tests I believe the unittests of callback's get_diff() were wrong. They were sending in a list where strings were expected. Because previous code was transforming the lists into strings via their repr, the previous tests did not fail but they would have formatted the test cases output in an odd way if we had looked at it. --- changelogs/fragments/copy-diff-text.yaml | 5 + lib/ansible/plugins/action/__init__.py | 30 +++-- lib/ansible/plugins/callback/__init__.py | 118 +++++++++---------- test/units/plugins/callback/test_callback.py | 29 ++--- 4 files changed, 94 insertions(+), 88 deletions(-) create mode 100644 changelogs/fragments/copy-diff-text.yaml diff --git a/changelogs/fragments/copy-diff-text.yaml b/changelogs/fragments/copy-diff-text.yaml new file mode 100644 index 0000000000..a05fb848fd --- /dev/null +++ b/changelogs/fragments/copy-diff-text.yaml @@ -0,0 +1,5 @@ +bugfixes: +- copy - Ensure that the src file contents is converted to unicode in diff + information so that it is properly wrapped by AnsibleUnsafeText to prevent + unexpected templating of diff data in Python3 + (https://github.com/ansible/ansible/issues/45717) diff --git a/lib/ansible/plugins/action/__init__.py b/lib/ansible/plugins/action/__init__.py index a693cfaea3..b57ef7e17b 100644 --- a/lib/ansible/plugins/action/__init__.py +++ b/lib/ansible/plugins/action/__init__.py @@ -1,3 +1,4 @@ +# coding: utf-8 # Copyright: (c) 2012-2014, Michael DeHaan # Copyright: (c) 2018, Ansible Project # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) @@ -1013,6 +1014,15 @@ class ActionBase(with_metaclass(ABCMeta, object)): def _get_diff_data(self, destination, source, task_vars, source_file=True): + # Note: Since we do not diff the source and destination before we transform from bytes into + # text the diff between source and destination may not be accurate. To fix this, we'd need + # to move the diffing from the callback plugins into here. + # + # Example of data which would cause trouble is src_content == b'\xff' and dest_content == + # b'\xfe'. Neither of those are valid utf-8 so both get turned into the replacement + # character: diff['before'] = u'�' ; diff['after'] = u'�' When the callback plugin later + # diffs before and after it shows an empty diff. + diff = {} display.debug("Going to peek to see if file has changed permissions") peek_result = self._execute_module(module_name='file', module_args=dict(path=destination, _diff_peek=True), task_vars=task_vars, persist_files=True) @@ -1020,22 +1030,22 @@ class ActionBase(with_metaclass(ABCMeta, object)): if not peek_result.get('failed', False) or peek_result.get('rc', 0) == 0: if peek_result.get('state') == 'absent': - diff['before'] = '' + diff['before'] = u'' elif peek_result.get('appears_binary'): diff['dst_binary'] = 1 elif peek_result.get('size') and C.MAX_FILE_SIZE_FOR_DIFF > 0 and peek_result['size'] > C.MAX_FILE_SIZE_FOR_DIFF: diff['dst_larger'] = C.MAX_FILE_SIZE_FOR_DIFF else: - display.debug("Slurping the file %s" % source) + display.debug(u"Slurping the file %s" % source) dest_result = self._execute_module(module_name='slurp', module_args=dict(path=destination), task_vars=task_vars, persist_files=True) if 'content' in dest_result: dest_contents = dest_result['content'] - if dest_result['encoding'] == 'base64': + if dest_result['encoding'] == u'base64': dest_contents = base64.b64decode(dest_contents) else: - raise AnsibleError("unknown encoding in content option, failed: %s" % dest_result) + raise AnsibleError("unknown encoding in content option, failed: %s" % to_native(dest_result)) diff['before_header'] = destination - diff['before'] = dest_contents + diff['before'] = to_text(dest_contents) if source_file: st = os.stat(source) @@ -1053,17 +1063,17 @@ class ActionBase(with_metaclass(ABCMeta, object)): diff['src_binary'] = 1 else: diff['after_header'] = source - diff['after'] = src_contents + diff['after'] = to_text(src_contents) else: - display.debug("source of file passed in") - diff['after_header'] = 'dynamically generated' + display.debug(u"source of file passed in") + diff['after_header'] = u'dynamically generated' diff['after'] = source if self._play_context.no_log: if 'before' in diff: - diff["before"] = "" + diff["before"] = u"" if 'after' in diff: - diff["after"] = " [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n" + diff["after"] = u" [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n" return diff diff --git a/lib/ansible/plugins/callback/__init__.py b/lib/ansible/plugins/callback/__init__.py index 6cd89305e3..1c11fe6b9f 100644 --- a/lib/ansible/plugins/callback/__init__.py +++ b/lib/ansible/plugins/callback/__init__.py @@ -31,7 +31,6 @@ from collections import MutableMapping from ansible import constants as C from ansible.parsing.ajson import AnsibleJSONEncoder from ansible.plugins import AnsiblePlugin, get_plugin_class -from ansible.module_utils._text import to_text from ansible.utils.color import stringc from ansible.vars.clean import strip_internal_keys @@ -155,67 +154,62 @@ class CallbackBase(AnsiblePlugin): ret = [] for diff in difflist: - try: - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - if 'dst_binary' in diff: - ret.append("diff skipped: destination file appears to be binary\n") - if 'src_binary' in diff: - ret.append("diff skipped: source file appears to be binary\n") - if 'dst_larger' in diff: - ret.append("diff skipped: destination file size is greater than %d\n" % diff['dst_larger']) - if 'src_larger' in diff: - ret.append("diff skipped: source file size is greater than %d\n" % diff['src_larger']) - if 'before' in diff and 'after' in diff: - # format complex structures into 'files' - for x in ['before', 'after']: - if isinstance(diff[x], MutableMapping): - diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(',', ': ')) + '\n' - if 'before_header' in diff: - before_header = "before: %s" % diff['before_header'] - else: - before_header = 'before' - if 'after_header' in diff: - after_header = "after: %s" % diff['after_header'] - else: - after_header = 'after' - before_lines = to_text(diff['before']).splitlines(True) - after_lines = to_text(diff['after']).splitlines(True) - if before_lines and not before_lines[-1].endswith('\n'): - before_lines[-1] += '\n\\ No newline at end of file\n' - if after_lines and not after_lines[-1].endswith('\n'): - after_lines[-1] += '\n\\ No newline at end of file\n' - differ = difflib.unified_diff(before_lines, - after_lines, - fromfile=before_header, - tofile=after_header, - fromfiledate='', - tofiledate='', - n=C.DIFF_CONTEXT) - difflines = list(differ) - if len(difflines) >= 3 and sys.version_info[:2] == (2, 6): - # difflib in Python 2.6 adds trailing spaces after - # filenames in the -- before/++ after headers. - difflines[0] = difflines[0].replace(' \n', '\n') - difflines[1] = difflines[1].replace(' \n', '\n') - # it also treats empty files differently - difflines[2] = difflines[2].replace('-1,0', '-0,0').replace('+1,0', '+0,0') - has_diff = False - for line in difflines: - has_diff = True - if line.startswith('+'): - line = stringc(line, C.COLOR_DIFF_ADD) - elif line.startswith('-'): - line = stringc(line, C.COLOR_DIFF_REMOVE) - elif line.startswith('@@'): - line = stringc(line, C.COLOR_DIFF_LINES) - ret.append(line) - if has_diff: - ret.append('\n') - if 'prepared' in diff: - ret.append(to_text(diff['prepared'])) - except UnicodeDecodeError: - ret.append(">> the files are different, but the diff library cannot compare unicode strings\n\n") + if 'dst_binary' in diff: + ret.append(u"diff skipped: destination file appears to be binary\n") + if 'src_binary' in diff: + ret.append(u"diff skipped: source file appears to be binary\n") + if 'dst_larger' in diff: + ret.append(u"diff skipped: destination file size is greater than %d\n" % diff['dst_larger']) + if 'src_larger' in diff: + ret.append(u"diff skipped: source file size is greater than %d\n" % diff['src_larger']) + if 'before' in diff and 'after' in diff: + # format complex structures into 'files' + for x in ['before', 'after']: + if isinstance(diff[x], MutableMapping): + diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(u',', u': ')) + u'\n' + if 'before_header' in diff: + before_header = u"before: %s" % diff['before_header'] + else: + before_header = u'before' + if 'after_header' in diff: + after_header = u"after: %s" % diff['after_header'] + else: + after_header = u'after' + before_lines = diff['before'].splitlines(True) + after_lines = diff['after'].splitlines(True) + if before_lines and not before_lines[-1].endswith(u'\n'): + before_lines[-1] += u'\n\\ No newline at end of file\n' + if after_lines and not after_lines[-1].endswith('\n'): + after_lines[-1] += u'\n\\ No newline at end of file\n' + differ = difflib.unified_diff(before_lines, + after_lines, + fromfile=before_header, + tofile=after_header, + fromfiledate=u'', + tofiledate=u'', + n=C.DIFF_CONTEXT) + difflines = list(differ) + if len(difflines) >= 3 and sys.version_info[:2] == (2, 6): + # difflib in Python 2.6 adds trailing spaces after + # filenames in the -- before/++ after headers. + difflines[0] = difflines[0].replace(u' \n', u'\n') + difflines[1] = difflines[1].replace(u' \n', u'\n') + # it also treats empty files differently + difflines[2] = difflines[2].replace(u'-1,0', u'-0,0').replace(u'+1,0', u'+0,0') + has_diff = False + for line in difflines: + has_diff = True + if line.startswith(u'+'): + line = stringc(line, C.COLOR_DIFF_ADD) + elif line.startswith(u'-'): + line = stringc(line, C.COLOR_DIFF_REMOVE) + elif line.startswith(u'@@'): + line = stringc(line, C.COLOR_DIFF_LINES) + ret.append(line) + if has_diff: + ret.append('\n') + if 'prepared' in diff: + ret.append(diff['prepared']) return u''.join(ret) def _get_item_label(self, result): diff --git a/test/units/plugins/callback/test_callback.py b/test/units/plugins/callback/test_callback.py index 1df4b55d7d..6e40ab47b5 100644 --- a/test/units/plugins/callback/test_callback.py +++ b/test/units/plugins/callback/test_callback.py @@ -175,9 +175,6 @@ class TestCallbackDumpResults(unittest.TestCase): self.assertTrue('LEFTIN' in json_out) -# TODO: triggr the 'except UnicodeError' around _get_diff -# that try except orig appeared in 61d01f549f2143fd9adfa4ffae42f09d24649c26 -# in 2013 so maybe a < py2.6 issue class TestCallbackDiff(unittest.TestCase): def setUp(self): @@ -188,28 +185,28 @@ class TestCallbackDiff(unittest.TestCase): def test_difflist(self): # TODO: split into smaller tests? - difflist = [{'before': ['preface\nThe Before String\npostscript'], - 'after': ['preface\nThe After String\npostscript'], - 'before_header': 'just before', - 'after_header': 'just after' + difflist = [{'before': u'preface\nThe Before String\npostscript', + 'after': u'preface\nThe After String\npostscript', + 'before_header': u'just before', + 'after_header': u'just after' }, - {'before': ['preface\nThe Before String\npostscript'], - 'after': ['preface\nThe After String\npostscript'], + {'before': u'preface\nThe Before String\npostscript', + 'after': u'preface\nThe After String\npostscript', }, {'src_binary': 'chicane'}, {'dst_binary': 'chicanery'}, {'dst_larger': 1}, {'src_larger': 2}, - {'prepared': 'what does prepared do?'}, - {'before_header': 'just before'}, - {'after_header': 'just after'}] + {'prepared': u'what does prepared do?'}, + {'before_header': u'just before'}, + {'after_header': u'just after'}] res = self.cb._get_diff(difflist) - self.assertIn('Before String', res) - self.assertIn('After String', res) - self.assertIn('just before', res) - self.assertIn('just after', res) + self.assertIn(u'Before String', res) + self.assertIn(u'After String', res) + self.assertIn(u'just before', res) + self.assertIn(u'just after', res) def test_simple_diff(self): self.assertMultiLineEqual(