From b6b24192060ebc1959872f86fd5705aca33903b8 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Thu, 13 Apr 2023 05:09:34 +0000 Subject: [PATCH] [PR #6274/14b19afc backport][stable-6] archive: Generate crc32 over 16MiB chunks (#6326) archive: Generate crc32 over 16MiB chunks (#6274) * archive: Generate crc32 over 16MiB chunks Running crc32 over the whole content of the compressed file potentially requires a lot of RAM. The crc32 function in zlib allows for calculating the checksum in chunks. This changes the code to calculate the checksum over 16 MiB chunks instead. 16 MiB is the value also used by shutil.copyfileobj(). * Update changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml Change the type of change to bugfix Co-authored-by: Felix Fontein * Update changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml Co-authored-by: Felix Fontein --------- Co-authored-by: Felix Fontein (cherry picked from commit 14b19afc9ae8dfd2e320d484229dc4c58a4b4e46) Co-authored-by: Nils Meyer --- .../6199-archive-generate-checksum-in-chunks.yml | 2 ++ plugins/modules/archive.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml diff --git a/changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml b/changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml new file mode 100644 index 0000000000..ed0a1e2dca --- /dev/null +++ b/changelogs/fragments/6199-archive-generate-checksum-in-chunks.yml @@ -0,0 +1,2 @@ +bugfixes: + - archive - reduce RAM usage by generating CRC32 checksum over chunks (https://github.com/ansible-collections/community.general/pull/6274). diff --git a/plugins/modules/archive.py b/plugins/modules/archive.py index bcead7b73c..8748fb8a3e 100644 --- a/plugins/modules/archive.py +++ b/plugins/modules/archive.py @@ -608,7 +608,13 @@ class TarArchive(Archive): # The python implementations of gzip, bz2, and lzma do not support restoring compressed files # to their original names so only file checksum is returned f = self._open_compressed_file(_to_native_ascii(path), 'r') - checksums = set([(b'', crc32(f.read()))]) + checksum = 0 + while True: + chunk = f.read(16 * 1024 * 1024) + if not chunk: + break + checksum = crc32(chunk, checksum) + checksums = set([(b'', checksum)]) f.close() except Exception: checksums = set()