From 855ca9b204ec094e277bb4ed89eeb68568817725 Mon Sep 17 00:00:00 2001 From: Dag Wieers Date: Wed, 6 Jul 2016 23:40:48 +0200 Subject: [PATCH] Revert PR #3575 since it causes problems related to exclude patterns (#3767) * Revert PR #3575 since it causes problems related to exclude patterns By using a different method for getting archive filelists, and extracting we introduced new problems related to excluding based on gtar patterns. As a result files that would be excluded by gtar, would still be in the filelist. Implementing our own gtar compatible pattern exclusion mechanism is near to impossible (believe me, we looked at it...). The best way is to look at the original problem and deal with that, and ensure that extraction and filelists are done with the exact same tool and exact same options. The solution is to decode the octal unicode representation in gtar's output back to unicode. Since gtar has no problem extracting these files in LANG=C, we simply has to compensate for it. This reverts #3575 and fixes #11348. * Implement codecs.escape_decode() instead of decode("string_escape") for python3 --- lib/ansible/modules/files/unarchive.py | 39 +++++++++----------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/lib/ansible/modules/files/unarchive.py b/lib/ansible/modules/files/unarchive.py index 2d07b8caf4..40a63dd771 100644 --- a/lib/ansible/modules/files/unarchive.py +++ b/lib/ansible/modules/files/unarchive.py @@ -5,7 +5,6 @@ # (c) 2013, Dylan Martin # (c) 2015, Toshio Kuratomi # (c) 2016, Dag Wieers -# (c) 2016, Virgil Dupras # # This file is part of Ansible # @@ -123,9 +122,8 @@ import grp import datetime import time import binascii +import codecs from zipfile import ZipFile, BadZipfile -import tarfile -import subprocess # String from tar that shows the tar contents are different from the # filesystem @@ -552,23 +550,25 @@ class TgzArchive(object): self.compress_mode = 'gz' self._files_in_archive = [] - def _get_tar_fileobj(self): - """Returns a file object that can be read by ``tarfile.open()``.""" - return open(self.src, 'rb') - @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive - # The use of Python's tarfile module here allows us to easily avoid tricky file encoding - # problems. Ref #11348 - try: - tf = tarfile.open(fileobj=self._get_tar_fileobj(), mode='r:%s' % self.compress_mode) - except Exception: + cmd = '%s -t%s' % (self.cmd_path, self.zipflag) + if self.opts: + cmd += ' ' + ' '.join(self.opts) + if self.excludes: + cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' + cmd += ' -f "%s"' % self.src + rc, out, err = self.module.run_command(cmd) + if rc != 0: raise UnarchiveError('Unable to list files in the archive') - for filename in tf.getnames(): + for filename in out.splitlines(): + # Compensate for locale-related problems in gtar output (octal unicode representation) #11348 +# filename = filename.decode('string_escape') + filename = codecs.escape_decode(filename)[0] if filename and filename not in self.excludes: self._files_in_archive.append(filename) return self._files_in_archive @@ -673,19 +673,6 @@ class TarXzArchive(TgzArchive): self.zipflag = 'J' self.compress_mode = '' - def _get_tar_fileobj(self): - # Python's tarfile module doesn't support xz compression so we have to manually uncompress - # it first. - xz_bin_path = self.module.get_bin_path('xz') - xz_stdout = tempfile.TemporaryFile() - # we don't use self.module.run_command() to avoid loading the whole archive in memory. - cmd = subprocess.Popen([xz_bin_path, '-dc', self.src], stdout=xz_stdout) - rc = cmd.wait() - if rc != 0: - raise UnarchiveError("Could not uncompress with xz") - xz_stdout.seek(0) - return xz_stdout - # try handlers in order and return the one that works or bail if none work def pick_handler(src, dest, file_args, module):