1
0
Fork 0
mirror of https://github.com/ansible-collections/community.general.git synced 2024-09-14 20:13:21 +02:00

archive - idempotency enhancement for 4.0.0 (#3075)

* Initial Commit

* Comparing with tar file checksums rather than tar header checksums

* Added changelog fragment

* Revert "Comparing with tar file checksums rather than tar header checksums"

This reverts commit bed4b17107.

* Restricting idempotency tests by format

* Applying review suggestions
This commit is contained in:
Ajpantuso 2021-08-12 02:18:38 -04:00 committed by GitHub
parent 6033ce695b
commit 1e466df863
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 21 deletions

View file

@ -0,0 +1,4 @@
---
breaking_changes:
- archive - adding idempotency checks for changes to file names and content within the ``destination`` file
(https://github.com/ansible-collections/community.general/pull/3075).

View file

@ -182,6 +182,7 @@ import zipfile
from fnmatch import fnmatch from fnmatch import fnmatch
from sys import version_info from sys import version_info
from traceback import format_exc from traceback import format_exc
from zlib import crc32
from ansible.module_utils.basic import AnsibleModule, missing_required_lib from ansible.module_utils.basic import AnsibleModule, missing_required_lib
from ansible.module_utils.common.text.converters import to_bytes, to_native from ansible.module_utils.common.text.converters import to_bytes, to_native
@ -234,10 +235,6 @@ def expand_paths(paths):
return expanded_path, is_globby return expanded_path, is_globby
def legacy_filter(path, exclusion_patterns):
return matches_exclusion_patterns(path, exclusion_patterns)
def matches_exclusion_patterns(path, exclusion_patterns): def matches_exclusion_patterns(path, exclusion_patterns):
return any(fnmatch(path, p) for p in exclusion_patterns) return any(fnmatch(path, p) for p in exclusion_patterns)
@ -313,6 +310,7 @@ class Archive(object):
if self.remove: if self.remove:
self._check_removal_safety() self._check_removal_safety()
self.original_checksums = self.destination_checksums()
self.original_size = self.destination_size() self.original_size = self.destination_size()
def add(self, path, archive_name): def add(self, path, archive_name):
@ -377,8 +375,16 @@ class Archive(object):
msg='Errors when writing archive at %s: %s' % (_to_native(self.destination), '; '.join(self.errors)) msg='Errors when writing archive at %s: %s' % (_to_native(self.destination), '; '.join(self.errors))
) )
def compare_with_original(self): def is_different_from_original(self):
self.changed |= self.original_size != self.destination_size() if self.original_checksums is None:
return self.original_size != self.destination_size()
else:
return self.original_checksums != self.destination_checksums()
def destination_checksums(self):
if self.destination_exists() and self.destination_readable():
return self._get_checksums(self.destination)
return None
def destination_exists(self): def destination_exists(self):
return self.destination and os.path.exists(self.destination) return self.destination and os.path.exists(self.destination)
@ -494,6 +500,10 @@ class Archive(object):
def _add(self, path, archive_name): def _add(self, path, archive_name):
pass pass
@abc.abstractmethod
def _get_checksums(self, path):
pass
class ZipArchive(Archive): class ZipArchive(Archive):
def __init__(self, module): def __init__(self, module):
@ -513,9 +523,18 @@ class ZipArchive(Archive):
self.file = zipfile.ZipFile(_to_native_ascii(self.destination), 'w', zipfile.ZIP_DEFLATED, True) self.file = zipfile.ZipFile(_to_native_ascii(self.destination), 'w', zipfile.ZIP_DEFLATED, True)
def _add(self, path, archive_name): def _add(self, path, archive_name):
if not legacy_filter(path, self.exclusion_patterns): if not matches_exclusion_patterns(path, self.exclusion_patterns):
self.file.write(path, archive_name) self.file.write(path, archive_name)
def _get_checksums(self, path):
try:
archive = zipfile.ZipFile(_to_native_ascii(path), 'r')
checksums = set((info.filename, info.CRC) for info in archive.infolist())
archive.close()
except zipfile.BadZipfile:
checksums = set()
return checksums
class TarArchive(Archive): class TarArchive(Archive):
def __init__(self, module): def __init__(self, module):
@ -554,13 +573,35 @@ class TarArchive(Archive):
return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo
def py26_filter(path): def py26_filter(path):
return legacy_filter(path, self.exclusion_patterns) return matches_exclusion_patterns(path, self.exclusion_patterns)
if PY27: if PY27:
self.file.add(path, archive_name, recursive=False, filter=py27_filter) self.file.add(path, archive_name, recursive=False, filter=py27_filter)
else: else:
self.file.add(path, archive_name, recursive=False, exclude=py26_filter) self.file.add(path, archive_name, recursive=False, exclude=py26_filter)
def _get_checksums(self, path):
try:
if self.format == 'xz':
with lzma.open(_to_native_ascii(path), 'r') as f:
archive = tarfile.open(fileobj=f)
checksums = set((info.name, info.chksum) for info in archive.getmembers())
archive.close()
else:
archive = tarfile.open(_to_native_ascii(path), 'r|' + self.format)
checksums = set((info.name, info.chksum) for info in archive.getmembers())
archive.close()
except (lzma.LZMAError, tarfile.ReadError, tarfile.CompressionError):
try:
# The python implementations of gzip, bz2, and lzma do not support restoring compressed files
# to their original names so only file checksum is returned
f = self._open_compressed_file(_to_native_ascii(path), 'r')
checksums = set([(b'', crc32(f.read()))])
f.close()
except Exception:
checksums = set()
return checksums
def get_archive(module): def get_archive(module):
if module.params['format'] == 'zip': if module.params['format'] == 'zip':
@ -603,7 +644,7 @@ def main():
else: else:
archive.add_targets() archive.add_targets()
archive.destination_state = STATE_INCOMPLETE if archive.has_unfound_targets() else STATE_ARCHIVED archive.destination_state = STATE_INCOMPLETE if archive.has_unfound_targets() else STATE_ARCHIVED
archive.compare_with_original() archive.changed |= archive.is_different_from_original()
if archive.remove: if archive.remove:
archive.remove_targets() archive.remove_targets()
else: else:
@ -613,7 +654,7 @@ def main():
else: else:
path = archive.paths[0] path = archive.paths[0]
archive.add_single_target(path) archive.add_single_target(path)
archive.compare_with_original() archive.changed |= archive.is_different_from_original()
if archive.remove: if archive.remove:
archive.remove_single_target(path) archive.remove_single_target(path)

View file

@ -19,12 +19,12 @@
format: "{{ format }}" format: "{{ format }}"
register: file_content_idempotency_after register: file_content_idempotency_after
# After idempotency fix result will be reliably changed for all formats
- name: Assert task status is changed - file content idempotency ({{ format }}) - name: Assert task status is changed - file content idempotency ({{ format }})
assert: assert:
that: that:
- file_content_idempotency_after is not changed - file_content_idempotency_after is changed
when: "format in ('tar', 'zip')" # Only ``zip`` archives are guaranteed to compare file content checksums rather than header checksums
when: "format == 'zip'"
- name: Remove archive - file content idempotency ({{ format }}) - name: Remove archive - file content idempotency ({{ format }})
file: file:
@ -54,12 +54,10 @@
format: "{{ format }}" format: "{{ format }}"
register: file_name_idempotency_after register: file_name_idempotency_after
# After idempotency fix result will be reliably changed for all formats
- name: Check task status - file name idempotency ({{ format }}) - name: Check task status - file name idempotency ({{ format }})
assert: assert:
that: that:
- file_name_idempotency_after is not changed - file_name_idempotency_after is changed
when: "format in ('tar', 'zip')"
- name: Remove archive - file name idempotency ({{ format }}) - name: Remove archive - file name idempotency ({{ format }})
file: file:
@ -89,12 +87,12 @@
format: "{{ format }}" format: "{{ format }}"
register: single_file_content_idempotency_after register: single_file_content_idempotency_after
# After idempotency fix result will be reliably changed for all formats
- name: Assert task status is changed - single file content idempotency ({{ format }}) - name: Assert task status is changed - single file content idempotency ({{ format }})
assert: assert:
that: that:
- single_file_content_idempotency_after is not changed - single_file_content_idempotency_after is changed
when: "format in ('tar', 'zip')" # ``tar`` archives are not guaranteed to identify changes to file content if the file meta properties are unchanged.
when: "format != 'tar'"
- name: Remove archive - single file content idempotency ({{ format }}) - name: Remove archive - single file content idempotency ({{ format }})
file: file:
@ -125,11 +123,12 @@
register: single_file_name_idempotency_after register: single_file_name_idempotency_after
# After idempotency fix result will be reliably changed for all formats # The gz, bz2, and xz formats do not store the original file name
# so it is not possible to identify a change in this scenario.
- name: Check task status - single file name idempotency ({{ format }}) - name: Check task status - single file name idempotency ({{ format }})
assert: assert:
that: that:
- single_file_name_idempotency_after is not changed - single_file_name_idempotency_after is changed
when: "format in ('tar', 'zip')" when: "format in ('tar', 'zip')"
- name: Remove archive - single file name idempotency ({{ format }}) - name: Remove archive - single file name idempotency ({{ format }})