From 9c20182bdfbda373d18825a0c1e061e6be502d31 Mon Sep 17 00:00:00 2001 From: Robin Miller Date: Mon, 6 Mar 2017 14:19:59 -0600 Subject: [PATCH] [cloud] Avoid failures in RDS modyue due to throttling while waiting (#22271) 1. Check less aggressively for resources that take a long time to return (some resources can take an hour or more). 2. Handle throttling when checking for resources, and back off quickly when it occurs. --- lib/ansible/modules/cloud/amazon/rds.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/ansible/modules/cloud/amazon/rds.py b/lib/ansible/modules/cloud/amazon/rds.py index f29942168f..e3edeccf1d 100644 --- a/lib/ansible/modules/cloud/amazon/rds.py +++ b/lib/ansible/modules/cloud/amazon/rds.py @@ -313,6 +313,8 @@ EXAMPLES = ''' import sys import time +from ansible.module_utils.ec2 import AWSRetry + try: import boto.rds HAS_BOTO = True @@ -650,23 +652,31 @@ class RDS2Snapshot: def await_resource(conn, resource, status, module): - wait_timeout = module.params.get('wait_timeout') + time.time() + start_time = time.time() + wait_timeout = module.params.get('wait_timeout') + start_time + check_interval = 5 while wait_timeout > time.time() and resource.status != status: - time.sleep(5) + time.sleep(check_interval) if wait_timeout <= time.time(): module.fail_json(msg="Timeout waiting for RDS resource %s" % resource.name) if module.params.get('command') == 'snapshot': # Temporary until all the rds2 commands have their responses parsed if resource.name is None: module.fail_json(msg="There was a problem waiting for RDS snapshot %s" % resource.snapshot) - resource = conn.get_db_snapshot(resource.name) + # Back off if we're getting throttled, since we're just waiting anyway + resource = AWSRetry.backoff(tries=5, delay=20, backoff=1.5)(conn.get_db_snapshot)(resource.name) else: # Temporary until all the rds2 commands have their responses parsed if resource.name is None: module.fail_json(msg="There was a problem waiting for RDS instance %s" % resource.instance) - resource = conn.get_db_instance(resource.name) + # Back off if we're getting throttled, since we're just waiting anyway + resource = AWSRetry.backoff(tries=5, delay=20, backoff=1.5)(conn.get_db_instance)(resource.name) if resource is None: break + # Some RDS resources take much longer than others to be ready. Check + # less aggressively for slow ones to avoid throttling. + if time.time() > start_time + 90: + check_interval = 20 return resource