mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
Fix to bytes surrogate and nonencodable chars (#21180)
* Add a surrogate_then_replace error strategy to keep to_bytes from tracebacking by default * Port all code that explicitly used surrogate_or_replace to surrogate_then_replace
This commit is contained in:
parent
149dd9ca86
commit
98541b7c8b
5 changed files with 94 additions and 39 deletions
|
@ -69,7 +69,7 @@ class InventoryParser(object):
|
||||||
for line in b_data.splitlines():
|
for line in b_data.splitlines():
|
||||||
if line and line[0] in self.b_COMMENT_MARKERS:
|
if line and line[0] in self.b_COMMENT_MARKERS:
|
||||||
# Replace is okay for comment lines
|
# Replace is okay for comment lines
|
||||||
#data.append(to_text(line, errors='surrogate_or_replace'))
|
#data.append(to_text(line, errors='surrogate_then_replace'))
|
||||||
# Currently we only need these lines for accurate lineno in errors
|
# Currently we only need these lines for accurate lineno in errors
|
||||||
data.append(u'')
|
data.append(u'')
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -44,6 +44,11 @@ except LookupError:
|
||||||
HAS_SURROGATEESCAPE = False
|
HAS_SURROGATEESCAPE = False
|
||||||
|
|
||||||
|
|
||||||
|
_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_escape',
|
||||||
|
'surrogate_or_strict',
|
||||||
|
'surrogate_then_replace'))
|
||||||
|
|
||||||
|
|
||||||
def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
||||||
"""Make sure that a string is a byte string
|
"""Make sure that a string is a byte string
|
||||||
|
|
||||||
|
@ -56,22 +61,35 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
||||||
:kwarg errors: The error handler to use if the text string is not
|
:kwarg errors: The error handler to use if the text string is not
|
||||||
encodable using the specified encoding. Any valid `codecs error
|
encodable using the specified encoding. Any valid `codecs error
|
||||||
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
|
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
|
||||||
may be specified. There are two additional error strategies
|
may be specified. There are three additional error strategies
|
||||||
specifically aimed at helping people to port code:
|
specifically aimed at helping people to port code. The first two are:
|
||||||
|
|
||||||
:surrogate_or_strict: Will use surrogateescape if it is a valid
|
:surrogate_or_strict: Will use ``surrogateescape`` if it is a valid
|
||||||
handler, otherwise it will use strict
|
handler, otherwise it will use ``strict``
|
||||||
:surrogate_or_replace: Will use surrogateescape if it is a valid
|
:surrogate_or_replace: Will use ``surrogateescape`` if it is a valid
|
||||||
handler, otherwise it will use replace.
|
handler, otherwise it will use ``replace``.
|
||||||
|
|
||||||
Because surrogateescape was added in Python3 this usually means that
|
Because ``surrogateescape`` was added in Python3 this usually means that
|
||||||
Python3 will use surrogateescape and Python2 will use the fallback
|
Python3 will use ``surrogateescape`` and Python2 will use the fallback
|
||||||
error handler. Note that the code checks for surrogateescape when the
|
error handler. Note that the code checks for ``surrogateescape`` when the
|
||||||
module is imported. If you have a backport of surrogateescape for
|
module is imported. If you have a backport of ``surrogateescape`` for
|
||||||
python2, be sure to register the error handler prior to importing this
|
Python2, be sure to register the error handler prior to importing this
|
||||||
module.
|
module.
|
||||||
|
|
||||||
The default is `surrogate_or_replace`
|
The last error handler is:
|
||||||
|
|
||||||
|
:surrogate_then_replace: Will use ``surrogateescape`` if it is a valid
|
||||||
|
handler. If encoding with ``surrogateescape`` would traceback,
|
||||||
|
surrogates are first replaced with a replacement characters
|
||||||
|
and then the string is encoded using ``replace`` (which replaces
|
||||||
|
the rest of the nonencodable bytes). If ``surrogateescape`` is
|
||||||
|
not present it will simply use ``replace``. (Added in Ansible 2.3)
|
||||||
|
This strategy is designed to never traceback when it attempts
|
||||||
|
to encode a string.
|
||||||
|
|
||||||
|
The default until Ansible-2.2 was ``surrogate_or_replace``
|
||||||
|
From Ansible-2.3 onwards, the default is ``surrogate_then_replace``.
|
||||||
|
|
||||||
:kwarg nonstring: The strategy to use if a nonstring is specified in
|
:kwarg nonstring: The strategy to use if a nonstring is specified in
|
||||||
``obj``. Default is 'simplerepr'. Valid values are:
|
``obj``. Default is 'simplerepr'. Valid values are:
|
||||||
|
|
||||||
|
@ -90,23 +108,36 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
||||||
byte string is in the specified encoding do::
|
byte string is in the specified encoding do::
|
||||||
|
|
||||||
encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
|
encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8')
|
||||||
|
|
||||||
|
.. version_changed:: 2.3
|
||||||
|
|
||||||
|
Added the ``surrogate_then_replace`` error handler and made it the default error handler.
|
||||||
"""
|
"""
|
||||||
if isinstance(obj, binary_type):
|
if isinstance(obj, binary_type):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
if errors in (None, 'surrogate_or_replace'):
|
# We're given a text string
|
||||||
|
# If it has surrogates, we know because it will decode
|
||||||
|
original_errors = errors
|
||||||
|
if errors in _COMPOSED_ERROR_HANDLERS:
|
||||||
if HAS_SURROGATEESCAPE:
|
if HAS_SURROGATEESCAPE:
|
||||||
errors = 'surrogateescape'
|
errors = 'surrogateescape'
|
||||||
|
elif errors == 'surrogate_or_strict':
|
||||||
|
errors = 'strict'
|
||||||
else:
|
else:
|
||||||
errors = 'replace'
|
errors = 'replace'
|
||||||
elif errors == 'surrogate_or_strict':
|
|
||||||
if HAS_SURROGATEESCAPE:
|
|
||||||
errors = 'surrogateescape'
|
|
||||||
else:
|
|
||||||
errors = 'strict'
|
|
||||||
|
|
||||||
if isinstance(obj, text_type):
|
if isinstance(obj, text_type):
|
||||||
|
try:
|
||||||
|
# Try this first as it's the fastest
|
||||||
return obj.encode(encoding, errors)
|
return obj.encode(encoding, errors)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
if original_errors in (None, 'surrogate_then_replace'):
|
||||||
|
# Slow but works
|
||||||
|
return_string = obj.encode('utf-8', 'surrogateescape')
|
||||||
|
return_string = return_string.decode('utf-8', 'replace')
|
||||||
|
return return_string.encode(encoding, 'replace')
|
||||||
|
raise
|
||||||
|
|
||||||
# Note: We do these last even though we have to call to_bytes again on the
|
# Note: We do these last even though we have to call to_bytes again on the
|
||||||
# value because we're optimizing the common case
|
# value because we're optimizing the common case
|
||||||
|
@ -144,8 +175,27 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
||||||
:kwarg errors: The error handler to use if the byte string is not
|
:kwarg errors: The error handler to use if the byte string is not
|
||||||
decodable using the specified encoding. Any valid `codecs error
|
decodable using the specified encoding. Any valid `codecs error
|
||||||
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
|
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
|
||||||
may be specified. On Python3 this defaults to 'surrogateescape'. On
|
may be specified. We support three additional error strategies
|
||||||
Python2, this defaults to 'replace'.
|
specifically aimed at helping people to port code:
|
||||||
|
|
||||||
|
:surrogate_or_strict: Will use surrogateescape if it is a valid
|
||||||
|
handler, otherwise it will use strict
|
||||||
|
:surrogate_or_replace: Will use surrogateescape if it is a valid
|
||||||
|
handler, otherwise it will use replace.
|
||||||
|
:surrogate_then_replace: Does the same as surrogate_or_replace but
|
||||||
|
`was added for symmetry with the error handlers in
|
||||||
|
:func:`ansible.module_utils._text.to_bytes` (Added in Ansible 2.3)
|
||||||
|
|
||||||
|
Because surrogateescape was added in Python3 this usually means that
|
||||||
|
Python3 will use `surrogateescape` and Python2 will use the fallback
|
||||||
|
error handler. Note that the code checks for surrogateescape when the
|
||||||
|
module is imported. If you have a backport of `surrogateescape` for
|
||||||
|
python2, be sure to register the error handler prior to importing this
|
||||||
|
module.
|
||||||
|
|
||||||
|
The default until Ansible-2.2 was `surrogate_or_replace`
|
||||||
|
In Ansible-2.3 this defaults to `surrogate_then_replace` for symmetry
|
||||||
|
with :func:`ansible.module_utils._text.to_bytes` .
|
||||||
:kwarg nonstring: The strategy to use if a nonstring is specified in
|
:kwarg nonstring: The strategy to use if a nonstring is specified in
|
||||||
``obj``. Default is 'simplerepr'. Valid values are:
|
``obj``. Default is 'simplerepr'. Valid values are:
|
||||||
|
|
||||||
|
@ -158,22 +208,27 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
|
||||||
:returns: Typically this returns a text string. If a nonstring object is
|
:returns: Typically this returns a text string. If a nonstring object is
|
||||||
passed in this may be a different type depending on the strategy
|
passed in this may be a different type depending on the strategy
|
||||||
specified by nonstring. This will never return a byte string.
|
specified by nonstring. This will never return a byte string.
|
||||||
|
From Ansible-2.3 onwards, the default is `surrogate_then_replace`.
|
||||||
|
|
||||||
|
.. version_changed:: 2.3
|
||||||
|
|
||||||
|
Added the surrogate_then_replace error handler and made it the default error handler.
|
||||||
"""
|
"""
|
||||||
if isinstance(obj, text_type):
|
if isinstance(obj, text_type):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
if errors in (None, 'surrogate_or_replace'):
|
if errors in _COMPOSED_ERROR_HANDLERS:
|
||||||
if HAS_SURROGATEESCAPE:
|
if HAS_SURROGATEESCAPE:
|
||||||
errors = 'surrogateescape'
|
errors = 'surrogateescape'
|
||||||
|
elif errors == 'surrogate_or_strict':
|
||||||
|
errors = 'strict'
|
||||||
else:
|
else:
|
||||||
errors = 'replace'
|
errors = 'replace'
|
||||||
elif errors == 'surrogate_or_strict':
|
|
||||||
if HAS_SURROGATEESCAPE:
|
|
||||||
errors = 'surrogateescape'
|
|
||||||
else:
|
|
||||||
errors = 'strict'
|
|
||||||
|
|
||||||
if isinstance(obj, binary_type):
|
if isinstance(obj, binary_type):
|
||||||
|
# Note: We don't need special handling for surrogate_then_replace
|
||||||
|
# because all bytes will either be made into surrogates or are valid
|
||||||
|
# to decode.
|
||||||
return obj.decode(encoding, errors)
|
return obj.decode(encoding, errors)
|
||||||
|
|
||||||
# Note: We do these last even though we have to call to_text again on the
|
# Note: We do these last even though we have to call to_text again on the
|
||||||
|
|
|
@ -403,9 +403,9 @@ def remove_values(value, no_log_strings):
|
||||||
native_str_value = native_str_value.replace(omit_me, '*' * 8)
|
native_str_value = native_str_value.replace(omit_me, '*' * 8)
|
||||||
|
|
||||||
if value_is_text and isinstance(native_str_value, binary_type):
|
if value_is_text and isinstance(native_str_value, binary_type):
|
||||||
value = to_text(native_str_value, encoding='utf-8', errors='surrogate_or_replace')
|
value = to_text(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
|
||||||
elif not value_is_text and isinstance(native_str_value, text_type):
|
elif not value_is_text and isinstance(native_str_value, text_type):
|
||||||
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_or_replace')
|
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
|
||||||
else:
|
else:
|
||||||
value = native_str_value
|
value = native_str_value
|
||||||
elif isinstance(value, SEQUENCETYPE):
|
elif isinstance(value, SEQUENCETYPE):
|
||||||
|
|
|
@ -406,7 +406,7 @@ class Facts(object):
|
||||||
def get_lsb_facts(self):
|
def get_lsb_facts(self):
|
||||||
lsb_path = self.module.get_bin_path('lsb_release')
|
lsb_path = self.module.get_bin_path('lsb_release')
|
||||||
if lsb_path:
|
if lsb_path:
|
||||||
rc, out, err = self.module.run_command([lsb_path, "-a"], errors='surrogate_or_replace')
|
rc, out, err = self.module.run_command([lsb_path, "-a"], errors='surrogate_then_replace')
|
||||||
if rc == 0:
|
if rc == 0:
|
||||||
self.facts['lsb'] = {}
|
self.facts['lsb'] = {}
|
||||||
for line in out.splitlines():
|
for line in out.splitlines():
|
||||||
|
@ -484,7 +484,7 @@ class Facts(object):
|
||||||
def get_caps_facts(self):
|
def get_caps_facts(self):
|
||||||
capsh_path = self.module.get_bin_path('capsh')
|
capsh_path = self.module.get_bin_path('capsh')
|
||||||
if capsh_path:
|
if capsh_path:
|
||||||
rc, out, err = self.module.run_command([capsh_path, "--print"], errors='surrogate_or_replace')
|
rc, out, err = self.module.run_command([capsh_path, "--print"], errors='surrogate_then_replace')
|
||||||
enforced_caps = []
|
enforced_caps = []
|
||||||
enforced = 'NA'
|
enforced = 'NA'
|
||||||
for line in out.splitlines():
|
for line in out.splitlines():
|
||||||
|
@ -1329,7 +1329,7 @@ class LinuxHardware(Hardware):
|
||||||
def _run_findmnt(self, findmnt_path):
|
def _run_findmnt(self, findmnt_path):
|
||||||
args = ['--list', '--noheadings', '--notruncate']
|
args = ['--list', '--noheadings', '--notruncate']
|
||||||
cmd = [findmnt_path] + args
|
cmd = [findmnt_path] + args
|
||||||
rc, out, err = self.module.run_command(cmd, errors='surrogate_or_replace')
|
rc, out, err = self.module.run_command(cmd, errors='surrogate_then_replace')
|
||||||
return rc, out, err
|
return rc, out, err
|
||||||
|
|
||||||
def _find_bind_mounts(self):
|
def _find_bind_mounts(self):
|
||||||
|
@ -1423,7 +1423,7 @@ class LinuxHardware(Hardware):
|
||||||
self.facts['devices'] = {}
|
self.facts['devices'] = {}
|
||||||
lspci = self.module.get_bin_path('lspci')
|
lspci = self.module.get_bin_path('lspci')
|
||||||
if lspci:
|
if lspci:
|
||||||
rc, pcidata, err = self.module.run_command([lspci, '-D'], errors='surrogate_or_replace')
|
rc, pcidata, err = self.module.run_command([lspci, '-D'], errors='surrogate_then_replace')
|
||||||
else:
|
else:
|
||||||
pcidata = None
|
pcidata = None
|
||||||
|
|
||||||
|
@ -2482,7 +2482,7 @@ class LinuxNetwork(Network):
|
||||||
continue
|
continue
|
||||||
if v == 'v6' and not socket.has_ipv6:
|
if v == 'v6' and not socket.has_ipv6:
|
||||||
continue
|
continue
|
||||||
rc, out, err = self.module.run_command(command[v], errors='surrogate_or_replace')
|
rc, out, err = self.module.run_command(command[v], errors='surrogate_then_replace')
|
||||||
if not out:
|
if not out:
|
||||||
# v6 routing may result in
|
# v6 routing may result in
|
||||||
# RTNETLINK answers: Invalid argument
|
# RTNETLINK answers: Invalid argument
|
||||||
|
@ -2647,10 +2647,10 @@ class LinuxNetwork(Network):
|
||||||
ip_path = self.module.get_bin_path("ip")
|
ip_path = self.module.get_bin_path("ip")
|
||||||
|
|
||||||
args = [ip_path, 'addr', 'show', 'primary', device]
|
args = [ip_path, 'addr', 'show', 'primary', device]
|
||||||
rc, primary_data, stderr = self.module.run_command(args, errors='surrogate_or_replace')
|
rc, primary_data, stderr = self.module.run_command(args, errors='surrogate_then_replace')
|
||||||
|
|
||||||
args = [ip_path, 'addr', 'show', 'secondary', device]
|
args = [ip_path, 'addr', 'show', 'secondary', device]
|
||||||
rc, secondary_data, stderr = self.module.run_command(args, errors='surrogate_or_replace')
|
rc, secondary_data, stderr = self.module.run_command(args, errors='surrogate_then_replace')
|
||||||
|
|
||||||
parse_ip_output(primary_data)
|
parse_ip_output(primary_data)
|
||||||
parse_ip_output(secondary_data, secondary=True)
|
parse_ip_output(secondary_data, secondary=True)
|
||||||
|
@ -2672,7 +2672,7 @@ class LinuxNetwork(Network):
|
||||||
ethtool_path = self.module.get_bin_path("ethtool")
|
ethtool_path = self.module.get_bin_path("ethtool")
|
||||||
if ethtool_path:
|
if ethtool_path:
|
||||||
args = [ethtool_path, '-k', device]
|
args = [ethtool_path, '-k', device]
|
||||||
rc, stdout, stderr = self.module.run_command(args, errors='surrogate_or_replace')
|
rc, stdout, stderr = self.module.run_command(args, errors='surrogate_then_replace')
|
||||||
if rc == 0:
|
if rc == 0:
|
||||||
for line in stdout.strip().splitlines():
|
for line in stdout.strip().splitlines():
|
||||||
if not line or line.endswith(":"):
|
if not line or line.endswith(":"):
|
||||||
|
|
|
@ -818,7 +818,7 @@ class ActionBase(with_metaclass(ABCMeta, object)):
|
||||||
data['rc'] = res['rc']
|
data['rc'] = res['rc']
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _low_level_execute_command(self, cmd, sudoable=True, in_data=None, executable=None, encoding_errors='surrogate_or_replace'):
|
def _low_level_execute_command(self, cmd, sudoable=True, in_data=None, executable=None, encoding_errors='surrogate_then_replace'):
|
||||||
'''
|
'''
|
||||||
This is the function which executes the low level shell command, which
|
This is the function which executes the low level shell command, which
|
||||||
may be commands to create/remove directories for temporary files, or to
|
may be commands to create/remove directories for temporary files, or to
|
||||||
|
|
Loading…
Reference in a new issue