diff --git a/lib/ansible/modules/files/unarchive.py b/lib/ansible/modules/files/unarchive.py index cf9005a47b..6ded1e779b 100644 --- a/lib/ansible/modules/files/unarchive.py +++ b/lib/ansible/modules/files/unarchive.py @@ -5,6 +5,7 @@ # (c) 2013, Dylan Martin # (c) 2015, Toshio Kuratomi # (c) 2016, Dag Wieers +# (c) 2016, Virgil Dupras # # This file is part of Ansible # @@ -114,6 +115,8 @@ import datetime import time import binascii from zipfile import ZipFile +import tarfile +import subprocess # String from tar that shows the tar contents are different from the # filesystem @@ -492,22 +495,23 @@ class TgzArchive(object): self.zipflag = 'z' self._files_in_archive = [] + def _get_tar_fileobj(self): + """Returns a file object that can be read by ``tarfile.open()``.""" + return open(self.src, 'rb') + @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive - cmd = '%s -t%s' % (self.cmd_path, self.zipflag) - if self.opts: - cmd += ' ' + ' '.join(self.opts) - if self.excludes: - cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' - cmd += ' -f "%s"' % self.src - rc, out, err = self.module.run_command(cmd) - if rc != 0: + # The use of Python's tarfile module here allows us to easily avoid tricky file encoding + # problems. Ref #11348 + try: + tf = tarfile.open(fileobj=self._get_tar_fileobj()) + except Exception: raise UnarchiveError('Unable to list files in the archive') - for filename in out.splitlines(): + for filename in tf.getnames(): if filename and filename not in self.excludes: self._files_in_archive.append(filename) return self._files_in_archive @@ -605,6 +609,19 @@ class TarXzArchive(TgzArchive): super(TarXzArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'J' + def _get_tar_fileobj(self): + # Python's tarfile module doesn't support xz compression so we have to manually uncompress + # it first. + xz_bin_path = self.module.get_bin_path('xz') + xz_stdout = tempfile.TemporaryFile() + # we don't use self.module.run_command() to avoid loading the whole archive in memory. + cmd = subprocess.Popen([xz_bin_path, '-dc', self.src], stdout=xz_stdout) + rc = cmd.wait() + if rc != 0: + raise UnarchiveError("Could not uncompress with xz") + xz_stdout.seek(0) + return xz_stdout + # try handlers in order and return the one that works or bail if none work def pick_handler(src, dest, file_args, module):