1
0
Fork 0
mirror of https://github.com/ansible-collections/community.general.git synced 2024-09-14 20:13:21 +02:00

Create reproducible tar archives

This commit is contained in:
Glenn Pratt 2024-07-29 17:08:24 -07:00
parent 37c8560542
commit 51a4112e41
No known key found for this signature in database

View file

@ -218,8 +218,6 @@ else:
LZMA_IMP_ERR = format_exc() LZMA_IMP_ERR = format_exc()
HAS_LZMA = False HAS_LZMA = False
PY27 = version_info[0:2] >= (2, 7)
STATE_ABSENT = 'absent' STATE_ABSENT = 'absent'
STATE_ARCHIVED = 'archive' STATE_ARCHIVED = 'archive'
STATE_COMPRESSED = 'compress' STATE_COMPRESSED = 'compress'
@ -282,6 +280,7 @@ class Archive(object):
self.format = module.params['format'] self.format = module.params['format']
self.must_archive = module.params['force_archive'] self.must_archive = module.params['force_archive']
self.remove = module.params['remove'] self.remove = module.params['remove']
self.reproducible_tar = module.params["reproducible_tar"] or False
self.changed = False self.changed = False
self.destination_state = STATE_ABSENT self.destination_state = STATE_ABSENT
@ -360,14 +359,14 @@ class Archive(object):
try: try:
for target in self.targets: for target in self.targets:
if os.path.isdir(target): if os.path.isdir(target):
for directory_path, directory_names, file_names in os.walk(target, topdown=True): paths = []
for directory_name in directory_names: for directory_path, _, file_names in os.walk(target, topdown=True):
full_path = os.path.join(directory_path, directory_name) paths.append(directory_path)
self.add(full_path, strip_prefix(self.root, full_path))
for file_name in file_names: for file_name in file_names:
full_path = os.path.join(directory_path, file_name) paths.append(os.path.join(directory_path, file_name))
self.add(full_path, strip_prefix(self.root, full_path))
for path in sorted(paths):
self.add(path, strip_prefix(self.root, path))
else: else:
self.add(target, strip_prefix(self.root, target)) self.add(target, strip_prefix(self.root, target))
except Exception as e: except Exception as e:
@ -490,6 +489,9 @@ class Archive(object):
return f return f
def _reproducible_mtime(self):
return 0
@abc.abstractmethod @abc.abstractmethod
def close(self): def close(self):
pass pass
@ -542,6 +544,33 @@ class ZipArchive(Archive):
return checksums return checksums
class TGZFileWithMtime(tarfile.TarFile):
def __init__(
self, name=None, mode=None, compresslevel=-1, fileobj=None, mtime=None, **kwargs
):
if fileobj is None:
fileobj = open(name, mode + "b")
# filename intentionally empty to match GNU tar
try:
gzipfileobj = gzip.GzipFile("", mode, compresslevel, fileobj, mtime)
except:
fileobj.close()
raise
# Allow GzipFile to close fileobj as needed
gzipfileobj.myfileobj = fileobj
try:
super(TGZFileWithMtime, self).__init__(mode="w", fileobj=gzipfileobj, **kwargs)
except:
gzipfileobj.close()
raise
# Allow TarFile to close GzipFile as needed
self._extfileobj = False
class TarArchive(Archive): class TarArchive(Archive):
def __init__(self, module): def __init__(self, module):
super(TarArchive, self).__init__(module) super(TarArchive, self).__init__(module)
@ -562,7 +591,11 @@ class TarArchive(Archive):
return True return True
def open(self): def open(self):
if self.format in ('gz', 'bz2'): if self.reproducible_tar and self.format == "gz":
self.file = TGZFileWithMtime(
_to_native_ascii(self.destination), "w", mtime=self._reproducible_mtime()
)
elif self.format in ('gz', 'bz2'):
self.file = tarfile.open(_to_native_ascii(self.destination), 'w|' + self.format) self.file = tarfile.open(_to_native_ascii(self.destination), 'w|' + self.format)
# python3 tarfile module allows xz format but for python2 we have to create the tarfile # python3 tarfile module allows xz format but for python2 we have to create the tarfile
# in memory and then compress it with lzma. # in memory and then compress it with lzma.
@ -575,16 +608,33 @@ class TarArchive(Archive):
self.module.fail_json(msg="%s is not a valid archive format" % self.format) self.module.fail_json(msg="%s is not a valid archive format" % self.format)
def _add(self, path, archive_name): def _add(self, path, archive_name):
def py27_filter(tarinfo): def filter(tarinfo: tarfile.TarInfo):
return None if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns) else tarinfo if matches_exclusion_patterns(tarinfo.name, self.exclusion_patterns):
return None
def py26_filter(path): if self.reproducible_tar:
return matches_exclusion_patterns(path, self.exclusion_patterns) # Remove unused backref that prevents copy
if hasattr(tarinfo, "tarfile"):
delattr(tarinfo, "tarfile")
if PY27: if tarinfo.isdir():
self.file.add(path, archive_name, recursive=False, filter=py27_filter) mode = 0o40000 | 0o755
else: else:
self.file.add(path, archive_name, recursive=False, exclude=py26_filter) mode = 0o100000 | 0o644
# Copy tarfile while reducing metadata
return tarinfo.replace(
mtime=self._reproducible_mtime(),
mode=mode,
uid=0,
gid=0,
uname="",
gname="",
)
return tarinfo
self.file.add(path, archive_name, recursive=False, filter=filter)
def _get_checksums(self, path): def _get_checksums(self, path):
if HAS_LZMA: if HAS_LZMA:
@ -637,6 +687,7 @@ def main():
exclusion_patterns=dict(type='list', elements='path'), exclusion_patterns=dict(type='list', elements='path'),
force_archive=dict(type='bool', default=False), force_archive=dict(type='bool', default=False),
remove=dict(type='bool', default=False), remove=dict(type='bool', default=False),
reproducible_tar=dict(type="bool", default=False),
), ),
add_file_common_args=True, add_file_common_args=True,
supports_check_mode=True, supports_check_mode=True,