From f20af4b9097edc4ff3136f93c74049e7a43cf6be Mon Sep 17 00:00:00 2001 From: Vinay Dandekar Date: Tue, 9 Jan 2018 01:51:45 +0530 Subject: [PATCH] [cloud] Change S3 MD5 checksum for multipart support in aws_s3 (#34216) - If file has only one part and is uploaded as multipart, it is still computed the same way as having multiple parts --- lib/ansible/modules/cloud/amazon/aws_s3.py | 42 +++++++++++++++++----- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/lib/ansible/modules/cloud/amazon/aws_s3.py b/lib/ansible/modules/cloud/amazon/aws_s3.py index bf72a6aade..83a62a2fd5 100644 --- a/lib/ansible/modules/cloud/amazon/aws_s3.py +++ b/lib/ansible/modules/cloud/amazon/aws_s3.py @@ -271,8 +271,9 @@ s3_keys: - prefix1/key2 ''' -import os +import hashlib import mimetypes +import os import traceback from ansible.module_utils.six.moves.urllib.parse import urlparse from ssl import SSLError @@ -306,7 +307,34 @@ def key_check(module, s3, bucket, obj, version=None, validate=True): return exists -def keysum(module, s3, bucket, obj, version=None): +def keysum_compare(module, local_file, s3, bucket, obj, version=None): + s3_keysum = keysum(s3, bucket, obj, version=version) + if '-' in s3_keysum: # Check for multipart, ETag is not a proper MD5 sum + parts = int(s3_keysum.split('-')[1]) + md5s = [] + + with open(local_file, 'rb') as f: + for part_num in range(1, parts + 1): + # Get the part size for every part of the multipart uploaded object + if version: + key_head = s3.head_object(Bucket=bucket, Key=obj, VersionId=version, PartNumber=part_num) + else: + key_head = s3.head_object(Bucket=bucket, Key=obj, PartNumber=part_num) + part_size = int(key_head['ContentLength']) + data = f.read(part_size) + hash = hashlib.md5(data) + md5s.append(hash) + + digests = b''.join(m.digest() for m in md5s) + digests_md5 = hashlib.md5(digests) + local_keysum = '{0}-{1}'.format(digests_md5.hexdigest(), len(md5s)) + else: # Compute the MD5 sum normally + local_keysum = module.md5(local_file) + + return s3_keysum == local_keysum + + +def keysum(s3, bucket, obj, version=None): if version: key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version) else: @@ -314,8 +342,6 @@ def keysum(module, s3, bucket, obj, version=None): if not key_check: return None md5_remote = key_check['ETag'][1:-1] - if '-' in md5_remote: # Check for multipart, etag is not md5 - return None return md5_remote @@ -706,11 +732,11 @@ def main(): else: module.fail_json(msg="Key %s does not exist." % obj) - # If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download. + # If the destination path doesn't exist or overwrite is True, no need to do the md5sum ETag check, so just download. # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. if path_check(dest): # Determine if the remote and local object are identical - if keysum(module, s3, bucket, obj, version=version) == module.md5(dest): + if keysum_compare(module, dest, s3, bucket, obj, version=version): sum_matches = True if overwrite == 'always': download_s3file(module, s3, bucket, obj, dest, retries, version=version) @@ -740,10 +766,10 @@ def main(): if bucketrtn: keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate) - # Lets check key state. Does it exist and if it does, compute the etag md5sum. + # Lets check key state. Does it exist and if it does, compute the ETag md5sum. if bucketrtn and keyrtn: # Compare the local and remote object - if module.md5(src) == keysum(module, s3, bucket, obj): + if keysum_compare(module, src, s3, bucket, obj): sum_matches = True if overwrite == 'always': # only use valid object acls for the upload_s3file function