diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index 09cc5623e5..edfabb5289 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -69,7 +69,7 @@ class InventoryParser(object): for line in b_data.splitlines(): if line and line[0] in self.b_COMMENT_MARKERS: # Replace is okay for comment lines - #data.append(to_text(line, errors='surrogate_or_replace')) + #data.append(to_text(line, errors='surrogate_then_replace')) # Currently we only need these lines for accurate lineno in errors data.append(u'') else: diff --git a/lib/ansible/module_utils/_text.py b/lib/ansible/module_utils/_text.py index 173bbf4a9d..8d6a9d4f19 100644 --- a/lib/ansible/module_utils/_text.py +++ b/lib/ansible/module_utils/_text.py @@ -44,6 +44,11 @@ except LookupError: HAS_SURROGATEESCAPE = False +_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_escape', + 'surrogate_or_strict', + 'surrogate_then_replace')) + + def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): """Make sure that a string is a byte string @@ -56,22 +61,35 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): :kwarg errors: The error handler to use if the text string is not encodable using the specified encoding. Any valid `codecs error handler `_ - may be specified. There are two additional error strategies - specifically aimed at helping people to port code: + may be specified. There are three additional error strategies + specifically aimed at helping people to port code. The first two are: - :surrogate_or_strict: Will use surrogateescape if it is a valid - handler, otherwise it will use strict - :surrogate_or_replace: Will use surrogateescape if it is a valid - handler, otherwise it will use replace. + :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid + handler, otherwise it will use ``strict`` + :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid + handler, otherwise it will use ``replace``. - Because surrogateescape was added in Python3 this usually means that - Python3 will use surrogateescape and Python2 will use the fallback - error handler. Note that the code checks for surrogateescape when the - module is imported. If you have a backport of surrogateescape for - python2, be sure to register the error handler prior to importing this + Because ``surrogateescape`` was added in Python3 this usually means that + Python3 will use ``surrogateescape`` and Python2 will use the fallback + error handler. Note that the code checks for ``surrogateescape`` when the + module is imported. If you have a backport of ``surrogateescape`` for + Python2, be sure to register the error handler prior to importing this module. - The default is `surrogate_or_replace` + The last error handler is: + + :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid + handler. If encoding with ``surrogateescape`` would traceback, + surrogates are first replaced with a replacement characters + and then the string is encoded using ``replace`` (which replaces + the rest of the nonencodable bytes). If ``surrogateescape`` is + not present it will simply use ``replace``. (Added in Ansible 2.3) + This strategy is designed to never traceback when it attempts + to encode a string. + + The default until Ansible-2.2 was ``surrogate_or_replace`` + From Ansible-2.3 onwards, the default is ``surrogate_then_replace``. + :kwarg nonstring: The strategy to use if a nonstring is specified in ``obj``. Default is 'simplerepr'. Valid values are: @@ -90,23 +108,36 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): byte string is in the specified encoding do:: encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') + + .. version_changed:: 2.3 + + Added the ``surrogate_then_replace`` error handler and made it the default error handler. """ if isinstance(obj, binary_type): return obj - if errors in (None, 'surrogate_or_replace'): + # We're given a text string + # If it has surrogates, we know because it will decode + original_errors = errors + if errors in _COMPOSED_ERROR_HANDLERS: if HAS_SURROGATEESCAPE: errors = 'surrogateescape' + elif errors == 'surrogate_or_strict': + errors = 'strict' else: errors = 'replace' - elif errors == 'surrogate_or_strict': - if HAS_SURROGATEESCAPE: - errors = 'surrogateescape' - else: - errors = 'strict' if isinstance(obj, text_type): - return obj.encode(encoding, errors) + try: + # Try this first as it's the fastest + return obj.encode(encoding, errors) + except UnicodeEncodeError: + if original_errors in (None, 'surrogate_then_replace'): + # Slow but works + return_string = obj.encode('utf-8', 'surrogateescape') + return_string = return_string.decode('utf-8', 'replace') + return return_string.encode(encoding, 'replace') + raise # Note: We do these last even though we have to call to_bytes again on the # value because we're optimizing the common case @@ -144,8 +175,27 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): :kwarg errors: The error handler to use if the byte string is not decodable using the specified encoding. Any valid `codecs error handler `_ - may be specified. On Python3 this defaults to 'surrogateescape'. On - Python2, this defaults to 'replace'. + may be specified. We support three additional error strategies + specifically aimed at helping people to port code: + + :surrogate_or_strict: Will use surrogateescape if it is a valid + handler, otherwise it will use strict + :surrogate_or_replace: Will use surrogateescape if it is a valid + handler, otherwise it will use replace. + :surrogate_then_replace: Does the same as surrogate_or_replace but + `was added for symmetry with the error handlers in + :func:`ansible.module_utils._text.to_bytes` (Added in Ansible 2.3) + + Because surrogateescape was added in Python3 this usually means that + Python3 will use `surrogateescape` and Python2 will use the fallback + error handler. Note that the code checks for surrogateescape when the + module is imported. If you have a backport of `surrogateescape` for + python2, be sure to register the error handler prior to importing this + module. + + The default until Ansible-2.2 was `surrogate_or_replace` + In Ansible-2.3 this defaults to `surrogate_then_replace` for symmetry + with :func:`ansible.module_utils._text.to_bytes` . :kwarg nonstring: The strategy to use if a nonstring is specified in ``obj``. Default is 'simplerepr'. Valid values are: @@ -158,22 +208,27 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): :returns: Typically this returns a text string. If a nonstring object is passed in this may be a different type depending on the strategy specified by nonstring. This will never return a byte string. + From Ansible-2.3 onwards, the default is `surrogate_then_replace`. + + .. version_changed:: 2.3 + + Added the surrogate_then_replace error handler and made it the default error handler. """ if isinstance(obj, text_type): return obj - if errors in (None, 'surrogate_or_replace'): + if errors in _COMPOSED_ERROR_HANDLERS: if HAS_SURROGATEESCAPE: errors = 'surrogateescape' + elif errors == 'surrogate_or_strict': + errors = 'strict' else: errors = 'replace' - elif errors == 'surrogate_or_strict': - if HAS_SURROGATEESCAPE: - errors = 'surrogateescape' - else: - errors = 'strict' if isinstance(obj, binary_type): + # Note: We don't need special handling for surrogate_then_replace + # because all bytes will either be made into surrogates or are valid + # to decode. return obj.decode(encoding, errors) # Note: We do these last even though we have to call to_text again on the diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index 5758509d86..7dc4a96728 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -403,9 +403,9 @@ def remove_values(value, no_log_strings): native_str_value = native_str_value.replace(omit_me, '*' * 8) if value_is_text and isinstance(native_str_value, binary_type): - value = to_text(native_str_value, encoding='utf-8', errors='surrogate_or_replace') + value = to_text(native_str_value, encoding='utf-8', errors='surrogate_then_replace') elif not value_is_text and isinstance(native_str_value, text_type): - value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_or_replace') + value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace') else: value = native_str_value elif isinstance(value, SEQUENCETYPE): diff --git a/lib/ansible/module_utils/facts.py b/lib/ansible/module_utils/facts.py index b9f5ab264b..077a5dd187 100644 --- a/lib/ansible/module_utils/facts.py +++ b/lib/ansible/module_utils/facts.py @@ -406,7 +406,7 @@ class Facts(object): def get_lsb_facts(self): lsb_path = self.module.get_bin_path('lsb_release') if lsb_path: - rc, out, err = self.module.run_command([lsb_path, "-a"], errors='surrogate_or_replace') + rc, out, err = self.module.run_command([lsb_path, "-a"], errors='surrogate_then_replace') if rc == 0: self.facts['lsb'] = {} for line in out.splitlines(): @@ -484,7 +484,7 @@ class Facts(object): def get_caps_facts(self): capsh_path = self.module.get_bin_path('capsh') if capsh_path: - rc, out, err = self.module.run_command([capsh_path, "--print"], errors='surrogate_or_replace') + rc, out, err = self.module.run_command([capsh_path, "--print"], errors='surrogate_then_replace') enforced_caps = [] enforced = 'NA' for line in out.splitlines(): @@ -1329,7 +1329,7 @@ class LinuxHardware(Hardware): def _run_findmnt(self, findmnt_path): args = ['--list', '--noheadings', '--notruncate'] cmd = [findmnt_path] + args - rc, out, err = self.module.run_command(cmd, errors='surrogate_or_replace') + rc, out, err = self.module.run_command(cmd, errors='surrogate_then_replace') return rc, out, err def _find_bind_mounts(self): @@ -1423,7 +1423,7 @@ class LinuxHardware(Hardware): self.facts['devices'] = {} lspci = self.module.get_bin_path('lspci') if lspci: - rc, pcidata, err = self.module.run_command([lspci, '-D'], errors='surrogate_or_replace') + rc, pcidata, err = self.module.run_command([lspci, '-D'], errors='surrogate_then_replace') else: pcidata = None @@ -2482,7 +2482,7 @@ class LinuxNetwork(Network): continue if v == 'v6' and not socket.has_ipv6: continue - rc, out, err = self.module.run_command(command[v], errors='surrogate_or_replace') + rc, out, err = self.module.run_command(command[v], errors='surrogate_then_replace') if not out: # v6 routing may result in # RTNETLINK answers: Invalid argument @@ -2647,10 +2647,10 @@ class LinuxNetwork(Network): ip_path = self.module.get_bin_path("ip") args = [ip_path, 'addr', 'show', 'primary', device] - rc, primary_data, stderr = self.module.run_command(args, errors='surrogate_or_replace') + rc, primary_data, stderr = self.module.run_command(args, errors='surrogate_then_replace') args = [ip_path, 'addr', 'show', 'secondary', device] - rc, secondary_data, stderr = self.module.run_command(args, errors='surrogate_or_replace') + rc, secondary_data, stderr = self.module.run_command(args, errors='surrogate_then_replace') parse_ip_output(primary_data) parse_ip_output(secondary_data, secondary=True) @@ -2672,7 +2672,7 @@ class LinuxNetwork(Network): ethtool_path = self.module.get_bin_path("ethtool") if ethtool_path: args = [ethtool_path, '-k', device] - rc, stdout, stderr = self.module.run_command(args, errors='surrogate_or_replace') + rc, stdout, stderr = self.module.run_command(args, errors='surrogate_then_replace') if rc == 0: for line in stdout.strip().splitlines(): if not line or line.endswith(":"): diff --git a/lib/ansible/plugins/action/__init__.py b/lib/ansible/plugins/action/__init__.py index 0d605b46fc..34ca2d71fe 100644 --- a/lib/ansible/plugins/action/__init__.py +++ b/lib/ansible/plugins/action/__init__.py @@ -818,7 +818,7 @@ class ActionBase(with_metaclass(ABCMeta, object)): data['rc'] = res['rc'] return data - def _low_level_execute_command(self, cmd, sudoable=True, in_data=None, executable=None, encoding_errors='surrogate_or_replace'): + def _low_level_execute_command(self, cmd, sudoable=True, in_data=None, executable=None, encoding_errors='surrogate_then_replace'): ''' This is the function which executes the low level shell command, which may be commands to create/remove directories for temporary files, or to