mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
[cloud] Change S3 MD5 checksum for multipart support in aws_s3 (#34216)
- If file has only one part and is uploaded as multipart, it is still computed the same way as having multiple parts
This commit is contained in:
parent
0587aedc01
commit
f20af4b909
1 changed files with 34 additions and 8 deletions
|
@ -271,8 +271,9 @@ s3_keys:
|
||||||
- prefix1/key2
|
- prefix1/key2
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import hashlib
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
from ansible.module_utils.six.moves.urllib.parse import urlparse
|
from ansible.module_utils.six.moves.urllib.parse import urlparse
|
||||||
from ssl import SSLError
|
from ssl import SSLError
|
||||||
|
@ -306,7 +307,34 @@ def key_check(module, s3, bucket, obj, version=None, validate=True):
|
||||||
return exists
|
return exists
|
||||||
|
|
||||||
|
|
||||||
def keysum(module, s3, bucket, obj, version=None):
|
def keysum_compare(module, local_file, s3, bucket, obj, version=None):
|
||||||
|
s3_keysum = keysum(s3, bucket, obj, version=version)
|
||||||
|
if '-' in s3_keysum: # Check for multipart, ETag is not a proper MD5 sum
|
||||||
|
parts = int(s3_keysum.split('-')[1])
|
||||||
|
md5s = []
|
||||||
|
|
||||||
|
with open(local_file, 'rb') as f:
|
||||||
|
for part_num in range(1, parts + 1):
|
||||||
|
# Get the part size for every part of the multipart uploaded object
|
||||||
|
if version:
|
||||||
|
key_head = s3.head_object(Bucket=bucket, Key=obj, VersionId=version, PartNumber=part_num)
|
||||||
|
else:
|
||||||
|
key_head = s3.head_object(Bucket=bucket, Key=obj, PartNumber=part_num)
|
||||||
|
part_size = int(key_head['ContentLength'])
|
||||||
|
data = f.read(part_size)
|
||||||
|
hash = hashlib.md5(data)
|
||||||
|
md5s.append(hash)
|
||||||
|
|
||||||
|
digests = b''.join(m.digest() for m in md5s)
|
||||||
|
digests_md5 = hashlib.md5(digests)
|
||||||
|
local_keysum = '{0}-{1}'.format(digests_md5.hexdigest(), len(md5s))
|
||||||
|
else: # Compute the MD5 sum normally
|
||||||
|
local_keysum = module.md5(local_file)
|
||||||
|
|
||||||
|
return s3_keysum == local_keysum
|
||||||
|
|
||||||
|
|
||||||
|
def keysum(s3, bucket, obj, version=None):
|
||||||
if version:
|
if version:
|
||||||
key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version)
|
key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version)
|
||||||
else:
|
else:
|
||||||
|
@ -314,8 +342,6 @@ def keysum(module, s3, bucket, obj, version=None):
|
||||||
if not key_check:
|
if not key_check:
|
||||||
return None
|
return None
|
||||||
md5_remote = key_check['ETag'][1:-1]
|
md5_remote = key_check['ETag'][1:-1]
|
||||||
if '-' in md5_remote: # Check for multipart, etag is not md5
|
|
||||||
return None
|
|
||||||
return md5_remote
|
return md5_remote
|
||||||
|
|
||||||
|
|
||||||
|
@ -706,11 +732,11 @@ def main():
|
||||||
else:
|
else:
|
||||||
module.fail_json(msg="Key %s does not exist." % obj)
|
module.fail_json(msg="Key %s does not exist." % obj)
|
||||||
|
|
||||||
# If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download.
|
# If the destination path doesn't exist or overwrite is True, no need to do the md5sum ETag check, so just download.
|
||||||
# Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
|
# Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
|
||||||
if path_check(dest):
|
if path_check(dest):
|
||||||
# Determine if the remote and local object are identical
|
# Determine if the remote and local object are identical
|
||||||
if keysum(module, s3, bucket, obj, version=version) == module.md5(dest):
|
if keysum_compare(module, dest, s3, bucket, obj, version=version):
|
||||||
sum_matches = True
|
sum_matches = True
|
||||||
if overwrite == 'always':
|
if overwrite == 'always':
|
||||||
download_s3file(module, s3, bucket, obj, dest, retries, version=version)
|
download_s3file(module, s3, bucket, obj, dest, retries, version=version)
|
||||||
|
@ -740,10 +766,10 @@ def main():
|
||||||
if bucketrtn:
|
if bucketrtn:
|
||||||
keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate)
|
keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate)
|
||||||
|
|
||||||
# Lets check key state. Does it exist and if it does, compute the etag md5sum.
|
# Lets check key state. Does it exist and if it does, compute the ETag md5sum.
|
||||||
if bucketrtn and keyrtn:
|
if bucketrtn and keyrtn:
|
||||||
# Compare the local and remote object
|
# Compare the local and remote object
|
||||||
if module.md5(src) == keysum(module, s3, bucket, obj):
|
if keysum_compare(module, src, s3, bucket, obj):
|
||||||
sum_matches = True
|
sum_matches = True
|
||||||
if overwrite == 'always':
|
if overwrite == 'always':
|
||||||
# only use valid object acls for the upload_s3file function
|
# only use valid object acls for the upload_s3file function
|
||||||
|
|
Loading…
Reference in a new issue