From 1e466df863ffebd19ddab66b99ed19eec21e6c0e Mon Sep 17 00:00:00 2001 From: Ajpantuso Date: Thu, 12 Aug 2021 02:18:38 -0400 Subject: [PATCH] archive - idempotency enhancement for 4.0.0 (#3075) * Initial Commit * Comparing with tar file checksums rather than tar header checksums * Added changelog fragment * Revert "Comparing with tar file checksums rather than tar header checksums" This reverts commit bed4b171077058f1ed29785c6def52de2b1f441c. * Restricting idempotency tests by format * Applying review suggestions --- .../3075-archive-idempotency-enhancements.yml | 4 ++ plugins/modules/files/archive.py | 61 ++++++++++++++++--- .../targets/archive/tests/idempotency.yml | 21 +++---- 3 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 changelogs/fragments/3075-archive-idempotency-enhancements.yml diff --git a/changelogs/fragments/3075-archive-idempotency-enhancements.yml b/changelogs/fragments/3075-archive-idempotency-enhancements.yml new file mode 100644 index 0000000000..3d0bf65fb7 --- /dev/null +++ b/changelogs/fragments/3075-archive-idempotency-enhancements.yml @@ -0,0 +1,4 @@ +--- +breaking_changes: + - archive - adding idempotency checks for changes to file names and content within the ``destination`` file + (https://github.com/ansible-collections/community.general/pull/3075). diff --git a/plugins/modules/files/archive.py b/plugins/modules/files/archive.py index 30c4de5aa8..91dc6e5112 100644 --- a/plugins/modules/files/archive.py +++ b/plugins/modules/files/archive.py @@ -182,6 +182,7 @@ import zipfile from fnmatch import fnmatch from sys import version_info from traceback import format_exc +from zlib import crc32 from ansible.module_utils.basic import AnsibleModule, missing_required_lib from ansible.module_utils.common.text.converters import to_bytes, to_native @@ -234,10 +235,6 @@ def expand_paths(paths): return expanded_path, is_globby -def legacy_filter(path, exclusion_patterns): - return matches_exclusion_patterns(path, exclusion_patterns) - - def matches_exclusion_patterns(path, exclusion_patterns): return any(fnmatch(path, p) for p in exclusion_patterns) @@ -313,6 +310,7 @@ class Archive(object): if self.remove: self._check_removal_safety() + self.original_checksums = self.destination_checksums() self.original_size = self.destination_size() def add(self, path, archive_name): @@ -377,8 +375,16 @@ class Archive(object): msg='Errors when writing archive at %s: %s' % (_to_native(self.destination), '; '.join(self.errors)) ) - def compare_with_original(self): - self.changed |= self.original_size != self.destination_size() + def is_different_from_original(self): + if self.original_checksums is None: + return self.original_size != self.destination_size() + else: + return self.original_checksums != self.destination_checksums() + + def destination_checksums(self): + if self.destination_exists() and self.destination_readable(): + return self._get_checksums(self.destination) + return None def destination_exists(self): return self.destination and os.path.exists(self.destination) @@ -494,6 +500,10 @@ class Archive(object): def _add(self, path, archive_name): pass + @abc.abstractmethod + def _get_checksums(self, path): + pass + class ZipArchive(Archive): def __init__(self, module): @@ -513,9 +523,18 @@ class ZipArchive(Archive): self.file = zipfile.ZipFile(_to_native_ascii(self.destination), 'w', zipfile.ZIP_DEFLATED, True) def _add(self, path, archive_name): - if not legacy_filter(path, self.exclusion_patterns): + if not matches_exclusion_patterns(path, self.exclusion_patterns): self.file.write(path, archive_name) + def _get_checksums(self, path): + try: + archive = zipfile.ZipFile(_to_native_ascii(path), 'r') + checksums = set((info.filename, info.CRC) for info in archive.infolist()) + archive.close() + except zipfile.BadZipfile: + checksums = set() + return checksums + class TarArchive(Archive): def __init__(self, module): @@ -554,13 +573,35 @@ class TarArchive(Archive): return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo def py26_filter(path): - return legacy_filter(path, self.exclusion_patterns) + return matches_exclusion_patterns(path, self.exclusion_patterns) if PY27: self.file.add(path, archive_name, recursive=False, filter=py27_filter) else: self.file.add(path, archive_name, recursive=False, exclude=py26_filter) + def _get_checksums(self, path): + try: + if self.format == 'xz': + with lzma.open(_to_native_ascii(path), 'r') as f: + archive = tarfile.open(fileobj=f) + checksums = set((info.name, info.chksum) for info in archive.getmembers()) + archive.close() + else: + archive = tarfile.open(_to_native_ascii(path), 'r|' + self.format) + checksums = set((info.name, info.chksum) for info in archive.getmembers()) + archive.close() + except (lzma.LZMAError, tarfile.ReadError, tarfile.CompressionError): + try: + # The python implementations of gzip, bz2, and lzma do not support restoring compressed files + # to their original names so only file checksum is returned + f = self._open_compressed_file(_to_native_ascii(path), 'r') + checksums = set([(b'', crc32(f.read()))]) + f.close() + except Exception: + checksums = set() + return checksums + def get_archive(module): if module.params['format'] == 'zip': @@ -603,7 +644,7 @@ def main(): else: archive.add_targets() archive.destination_state = STATE_INCOMPLETE if archive.has_unfound_targets() else STATE_ARCHIVED - archive.compare_with_original() + archive.changed |= archive.is_different_from_original() if archive.remove: archive.remove_targets() else: @@ -613,7 +654,7 @@ def main(): else: path = archive.paths[0] archive.add_single_target(path) - archive.compare_with_original() + archive.changed |= archive.is_different_from_original() if archive.remove: archive.remove_single_target(path) diff --git a/tests/integration/targets/archive/tests/idempotency.yml b/tests/integration/targets/archive/tests/idempotency.yml index f53f768164..9262601572 100644 --- a/tests/integration/targets/archive/tests/idempotency.yml +++ b/tests/integration/targets/archive/tests/idempotency.yml @@ -19,12 +19,12 @@ format: "{{ format }}" register: file_content_idempotency_after -# After idempotency fix result will be reliably changed for all formats - name: Assert task status is changed - file content idempotency ({{ format }}) assert: that: - - file_content_idempotency_after is not changed - when: "format in ('tar', 'zip')" + - file_content_idempotency_after is changed + # Only ``zip`` archives are guaranteed to compare file content checksums rather than header checksums + when: "format == 'zip'" - name: Remove archive - file content idempotency ({{ format }}) file: @@ -54,12 +54,10 @@ format: "{{ format }}" register: file_name_idempotency_after -# After idempotency fix result will be reliably changed for all formats - name: Check task status - file name idempotency ({{ format }}) assert: that: - - file_name_idempotency_after is not changed - when: "format in ('tar', 'zip')" + - file_name_idempotency_after is changed - name: Remove archive - file name idempotency ({{ format }}) file: @@ -89,12 +87,12 @@ format: "{{ format }}" register: single_file_content_idempotency_after -# After idempotency fix result will be reliably changed for all formats - name: Assert task status is changed - single file content idempotency ({{ format }}) assert: that: - - single_file_content_idempotency_after is not changed - when: "format in ('tar', 'zip')" + - single_file_content_idempotency_after is changed + # ``tar`` archives are not guaranteed to identify changes to file content if the file meta properties are unchanged. + when: "format != 'tar'" - name: Remove archive - single file content idempotency ({{ format }}) file: @@ -125,11 +123,12 @@ register: single_file_name_idempotency_after -# After idempotency fix result will be reliably changed for all formats +# The gz, bz2, and xz formats do not store the original file name +# so it is not possible to identify a change in this scenario. - name: Check task status - single file name idempotency ({{ format }}) assert: that: - - single_file_name_idempotency_after is not changed + - single_file_name_idempotency_after is changed when: "format in ('tar', 'zip')" - name: Remove archive - single file name idempotency ({{ format }})