mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
[cloud] Change S3 MD5 checksum for multipart support in aws_s3 (#34216)
- If file has only one part and is uploaded as multipart, it is still computed the same way as having multiple parts
This commit is contained in:
parent
0587aedc01
commit
f20af4b909
1 changed files with 34 additions and 8 deletions
|
@ -271,8 +271,9 @@ s3_keys:
|
|||
- prefix1/key2
|
||||
'''
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import mimetypes
|
||||
import os
|
||||
import traceback
|
||||
from ansible.module_utils.six.moves.urllib.parse import urlparse
|
||||
from ssl import SSLError
|
||||
|
@ -306,7 +307,34 @@ def key_check(module, s3, bucket, obj, version=None, validate=True):
|
|||
return exists
|
||||
|
||||
|
||||
def keysum(module, s3, bucket, obj, version=None):
|
||||
def keysum_compare(module, local_file, s3, bucket, obj, version=None):
|
||||
s3_keysum = keysum(s3, bucket, obj, version=version)
|
||||
if '-' in s3_keysum: # Check for multipart, ETag is not a proper MD5 sum
|
||||
parts = int(s3_keysum.split('-')[1])
|
||||
md5s = []
|
||||
|
||||
with open(local_file, 'rb') as f:
|
||||
for part_num in range(1, parts + 1):
|
||||
# Get the part size for every part of the multipart uploaded object
|
||||
if version:
|
||||
key_head = s3.head_object(Bucket=bucket, Key=obj, VersionId=version, PartNumber=part_num)
|
||||
else:
|
||||
key_head = s3.head_object(Bucket=bucket, Key=obj, PartNumber=part_num)
|
||||
part_size = int(key_head['ContentLength'])
|
||||
data = f.read(part_size)
|
||||
hash = hashlib.md5(data)
|
||||
md5s.append(hash)
|
||||
|
||||
digests = b''.join(m.digest() for m in md5s)
|
||||
digests_md5 = hashlib.md5(digests)
|
||||
local_keysum = '{0}-{1}'.format(digests_md5.hexdigest(), len(md5s))
|
||||
else: # Compute the MD5 sum normally
|
||||
local_keysum = module.md5(local_file)
|
||||
|
||||
return s3_keysum == local_keysum
|
||||
|
||||
|
||||
def keysum(s3, bucket, obj, version=None):
|
||||
if version:
|
||||
key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version)
|
||||
else:
|
||||
|
@ -314,8 +342,6 @@ def keysum(module, s3, bucket, obj, version=None):
|
|||
if not key_check:
|
||||
return None
|
||||
md5_remote = key_check['ETag'][1:-1]
|
||||
if '-' in md5_remote: # Check for multipart, etag is not md5
|
||||
return None
|
||||
return md5_remote
|
||||
|
||||
|
||||
|
@ -706,11 +732,11 @@ def main():
|
|||
else:
|
||||
module.fail_json(msg="Key %s does not exist." % obj)
|
||||
|
||||
# If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download.
|
||||
# If the destination path doesn't exist or overwrite is True, no need to do the md5sum ETag check, so just download.
|
||||
# Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
|
||||
if path_check(dest):
|
||||
# Determine if the remote and local object are identical
|
||||
if keysum(module, s3, bucket, obj, version=version) == module.md5(dest):
|
||||
if keysum_compare(module, dest, s3, bucket, obj, version=version):
|
||||
sum_matches = True
|
||||
if overwrite == 'always':
|
||||
download_s3file(module, s3, bucket, obj, dest, retries, version=version)
|
||||
|
@ -740,10 +766,10 @@ def main():
|
|||
if bucketrtn:
|
||||
keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate)
|
||||
|
||||
# Lets check key state. Does it exist and if it does, compute the etag md5sum.
|
||||
# Lets check key state. Does it exist and if it does, compute the ETag md5sum.
|
||||
if bucketrtn and keyrtn:
|
||||
# Compare the local and remote object
|
||||
if module.md5(src) == keysum(module, s3, bucket, obj):
|
||||
if keysum_compare(module, src, s3, bucket, obj):
|
||||
sum_matches = True
|
||||
if overwrite == 'always':
|
||||
# only use valid object acls for the upload_s3file function
|
||||
|
|
Loading…
Reference in a new issue