mirror of
				https://github.com/ansible-collections/community.general.git
				synced 2024-09-14 20:13:21 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			368 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			368 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/python
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| # (c) 2012, Jan-Piet Mens <jpmens () gmail.com>
 | |
| #
 | |
| # This file is part of Ansible
 | |
| #
 | |
| # Ansible is free software: you can redistribute it and/or modify
 | |
| # it under the terms of the GNU General Public License as published by
 | |
| # the Free Software Foundation, either version 3 of the License, or
 | |
| # (at your option) any later version.
 | |
| #
 | |
| # Ansible is distributed in the hope that it will be useful,
 | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| # GNU General Public License for more details.
 | |
| #
 | |
| # You should have received a copy of the GNU General Public License
 | |
| # along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
 | |
| #
 | |
| # see examples/playbooks/get_url.yml
 | |
| 
 | |
| import shutil
 | |
| import datetime
 | |
| import re
 | |
| import tempfile
 | |
| 
 | |
| DOCUMENTATION = '''
 | |
| ---
 | |
| module: get_url
 | |
| short_description: Downloads files from HTTP, HTTPS, or FTP to node
 | |
| description:
 | |
|      - Downloads files from HTTP, HTTPS, or FTP to the remote server. The remote
 | |
|        server I(must) have direct access to the remote resource.
 | |
|      - By default, if an environment variable C(<protocol>_proxy) is set on
 | |
|        the target host, requests will be sent through that proxy. This
 | |
|        behaviour can be overridden by setting a variable for this task
 | |
|        (see `setting the environment
 | |
|        <http://www.ansibleworks.com/docs/playbooks_environment.html#setting-the-environment-and-working-with-proxies>`_),
 | |
|        or by using the use_proxy option.
 | |
| version_added: "0.6"
 | |
| options:
 | |
|   url:
 | |
|     description:
 | |
|       - HTTP, HTTPS, or FTP URL in the form (http|https|ftp)://[user[:pass]]@host.domain[:port]/path
 | |
|     required: true
 | |
|     default: null
 | |
|     aliases: []
 | |
|   dest:
 | |
|     description:
 | |
|       - absolute path of where to download the file to.
 | |
|       - If C(dest) is a directory, either the server provided filename or, if
 | |
|         none provided, the base name of the URL on the remote server will be
 | |
|         used. If a directory, C(force) has no effect.
 | |
|     required: true
 | |
|     default: null
 | |
|   force:
 | |
|     description:
 | |
|       - If C(yes) and C(dest) is not a directory, will download the file every
 | |
|         time and replace the file if the contents change. If C(no), the file
 | |
|         will only be downloaded if the destination does not exist. Generally
 | |
|         should be C(yes) only for small local files. Prior to 0.6, this module
 | |
|         behaved as if C(yes) was the default.
 | |
|         Has no effect if C(dest) is a directory - the file will always be
 | |
|         downloaded, but replaced only if the contents changed.
 | |
|     version_added: "0.7"
 | |
|     required: false
 | |
|     choices: [ "yes", "no" ]
 | |
|     default: "no"
 | |
|     aliases: [ "thirsty" ]
 | |
|   sha256sum:
 | |
|     description:
 | |
|       - If a SHA-256 checksum is passed to this parameter, the digest of the
 | |
|         destination file will be calculated after it is downloaded to ensure
 | |
|         its integrity and verify that the transfer completed successfully.
 | |
|     version_added: "1.3"
 | |
|     required: false
 | |
|     default: null
 | |
|   use_proxy:
 | |
|     description:
 | |
|       - if C(no), it will not use a proxy, even if one is defined in
 | |
|         an environment variable on the target hosts.
 | |
|     required: false
 | |
|     default: 'yes'
 | |
|     choices: ['yes', 'no']
 | |
|   others:
 | |
|     description:
 | |
|       - all arguments accepted by the M(file) module also work here
 | |
|     required: false
 | |
| notes:
 | |
|     - This module doesn't yet support configuration for proxies.
 | |
| # informational: requirements for nodes
 | |
| requirements: [ urllib2, urlparse ]
 | |
| author: Jan-Piet Mens
 | |
| '''
 | |
| 
 | |
| EXAMPLES='''
 | |
| - name: download foo.conf
 | |
|   get_url: url=http://example.com/path/file.conf dest=/etc/foo.conf mode=0440
 | |
| 
 | |
| - name: download file with sha256 check
 | |
|   get_url: url=http://example.com/path/file.conf dest=/etc/foo.conf sha256sum=b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c
 | |
| '''
 | |
| 
 | |
| try:
 | |
|     import hashlib
 | |
|     HAS_HASHLIB=True
 | |
| except ImportError:
 | |
|     HAS_HASHLIB=False
 | |
| 
 | |
| try:
 | |
|     import urllib2
 | |
|     HAS_URLLIB2 = True
 | |
| except ImportError:
 | |
|     HAS_URLLIB2 = False
 | |
| 
 | |
| try:
 | |
|     import urlparse
 | |
|     import socket
 | |
|     HAS_URLPARSE = True
 | |
| except ImportError:
 | |
|     HAS_URLPARSE=False
 | |
| 
 | |
| # ==============================================================
 | |
| # url handling
 | |
| 
 | |
| def url_filename(url):
 | |
|     fn = os.path.basename(urlparse.urlsplit(url)[2])
 | |
|     if fn == '':
 | |
|         return 'index.html'
 | |
|     return fn
 | |
| 
 | |
| def url_do_get(module, url, dest, use_proxy, last_mod_time):
 | |
|     """
 | |
|     Get url and return request and info
 | |
|     Credits: http://stackoverflow.com/questions/7006574/how-to-download-file-from-ftp
 | |
|     """
 | |
| 
 | |
|     USERAGENT = 'ansible-httpget'
 | |
|     info = dict(url=url, dest=dest)
 | |
|     r = None
 | |
|     handlers = []
 | |
| 
 | |
|     parsed = urlparse.urlparse(url)
 | |
| 
 | |
|     if '@' in parsed[1]:
 | |
|         credentials, netloc = parsed[1].split('@', 1)
 | |
|         if ':' in credentials:
 | |
|             username, password = credentials.split(':', 1)
 | |
|         else:
 | |
|             username = credentials
 | |
|             password = ''
 | |
|         parsed = list(parsed)
 | |
|         parsed[1] = netloc
 | |
| 
 | |
|         passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
 | |
|         # this creates a password manager
 | |
|         passman.add_password(None, netloc, username, password)
 | |
|         # because we have put None at the start it will always
 | |
|         # use this username/password combination for  urls
 | |
|         # for which `theurl` is a super-url
 | |
| 
 | |
|         authhandler = urllib2.HTTPBasicAuthHandler(passman)
 | |
|         # create the AuthHandler
 | |
|         handlers.append(authhandler)
 | |
| 
 | |
|         #reconstruct url without credentials
 | |
|         url = urlparse.urlunparse(parsed)
 | |
| 
 | |
|     if not use_proxy:
 | |
|         proxyhandler = urllib2.ProxyHandler({})
 | |
|         handlers.append(proxyhandler)
 | |
| 
 | |
|     opener = urllib2.build_opener(*handlers)
 | |
|     urllib2.install_opener(opener)
 | |
|     request = urllib2.Request(url)
 | |
|     request.add_header('User-agent', USERAGENT)
 | |
| 
 | |
|     if last_mod_time:
 | |
|         tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
 | |
|         request.add_header('If-Modified-Since', tstamp)
 | |
| 
 | |
|     try:
 | |
|         r = urllib2.urlopen(request)
 | |
|         info.update(r.info())
 | |
|         info['url'] = r.geturl()  # The URL goes in too, because of redirects.
 | |
|         info.update(dict(msg="OK (%s bytes)" % r.headers.get('Content-Length', 'unknown'), status=200))
 | |
|     except urllib2.HTTPError, e:
 | |
|         # Must not fail_json() here so caller can handle HTTP 304 unmodified
 | |
|         info.update(dict(msg=str(e), status=e.code))
 | |
|     except urllib2.URLError, e:
 | |
|         code = getattr(e, 'code', -1)
 | |
|         module.fail_json(msg="Request failed: %s" % str(e), status_code=code)
 | |
| 
 | |
|     return r, info
 | |
| 
 | |
| def url_get(module, url, dest, use_proxy, last_mod_time):
 | |
|     """
 | |
|     Download data from the url and store in a temporary file.
 | |
| 
 | |
|     Return (tempfile, info about the request)
 | |
|     """
 | |
| 
 | |
|     req, info = url_do_get(module, url, dest, use_proxy, last_mod_time)
 | |
| 
 | |
|     # TODO: should really handle 304, but how? src file could exist (and be newer) but empty
 | |
|     if info['status'] == 304:
 | |
|         module.exit_json(url=url, dest=dest, changed=False, msg=info.get('msg', ''))
 | |
| 
 | |
|     # create a temporary file and copy content to do md5-based replacement
 | |
|     if info['status'] != 200:
 | |
|         module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], url=url, dest=dest)
 | |
| 
 | |
|     fd, tempname = tempfile.mkstemp()
 | |
|     f = os.fdopen(fd, 'wb')
 | |
|     try:
 | |
|         shutil.copyfileobj(req, f)
 | |
|     except Exception, err:
 | |
|         os.remove(tempname)
 | |
|         module.fail_json(msg="failed to create temporary content file: %s" % str(err))
 | |
|     f.close()
 | |
|     req.close()
 | |
|     return tempname, info
 | |
| 
 | |
| def extract_filename_from_headers(headers):
 | |
|     """
 | |
|     Extracts a filename from the given dict of HTTP headers.
 | |
| 
 | |
|     Looks for the content-disposition header and applies a regex.
 | |
|     Returns the filename if successful, else None."""
 | |
|     cont_disp_regex = 'attachment; ?filename="(.+)"'
 | |
|     res = None
 | |
| 
 | |
|     if 'content-disposition' in headers:
 | |
|         cont_disp = headers['content-disposition']
 | |
|         match = re.match(cont_disp_regex, cont_disp)
 | |
|         if match:
 | |
|             res = match.group(1)
 | |
|             # Try preventing any funny business.
 | |
|             res = os.path.basename(res)
 | |
| 
 | |
|     return res
 | |
| 
 | |
| # ==============================================================
 | |
| # main
 | |
| 
 | |
| def main():
 | |
| 
 | |
|     # does this really happen on non-ancient python?
 | |
|     if not HAS_URLLIB2:
 | |
|         module.fail_json(msg="urllib2 is not installed")
 | |
|     if not HAS_URLPARSE:
 | |
|         module.fail_json(msg="urlparse is not installed")
 | |
| 
 | |
|     module = AnsibleModule(
 | |
|         # not checking because of daisy chain to file module
 | |
|         argument_spec = dict(
 | |
|             url = dict(required=True),
 | |
|             dest = dict(required=True),
 | |
|             force = dict(default='no', aliases=['thirsty'], type='bool'),
 | |
|             sha256sum = dict(default=''),
 | |
|             use_proxy = dict(default='yes', type='bool')
 | |
|         ),
 | |
|         add_file_common_args=True
 | |
|     )
 | |
| 
 | |
|     url  = module.params['url']
 | |
|     dest = os.path.expanduser(module.params['dest'])
 | |
|     force = module.params['force']
 | |
|     sha256sum = module.params['sha256sum']
 | |
|     use_proxy = module.params['use_proxy']
 | |
| 
 | |
|     dest_is_dir = os.path.isdir(dest)
 | |
|     last_mod_time = None
 | |
| 
 | |
|     if not dest_is_dir and os.path.exists(dest):
 | |
|         if not force:
 | |
|             module.exit_json(msg="file already exists", dest=dest, url=url, changed=False)
 | |
| 
 | |
|         # If the file already exists, prepare the last modified time for the
 | |
|         # request.
 | |
|         mtime = os.path.getmtime(dest)
 | |
|         last_mod_time = datetime.datetime.utcfromtimestamp(mtime)
 | |
| 
 | |
|     # download to tmpsrc
 | |
|     tmpsrc, info = url_get(module, url, dest, use_proxy, last_mod_time)
 | |
| 
 | |
|     # Now the request has completed, we can finally generate the final
 | |
|     # destination file name from the info dict.
 | |
| 
 | |
|     if dest_is_dir:
 | |
|         filename = extract_filename_from_headers(info)
 | |
|         if not filename:
 | |
|             # Fall back to extracting the filename from the URL.
 | |
|             # Pluck the URL from the info, since a redirect could have changed
 | |
|             # it.
 | |
|             filename = url_filename(info['url'])
 | |
|         dest = os.path.join(dest, filename)
 | |
| 
 | |
|     md5sum_src   = None
 | |
|     md5sum_dest  = None
 | |
| 
 | |
|     # raise an error if there is no tmpsrc file
 | |
|     if not os.path.exists(tmpsrc):
 | |
|         os.remove(tmpsrc)
 | |
|         module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'])
 | |
|     if not os.access(tmpsrc, os.R_OK):
 | |
|         os.remove(tmpsrc)
 | |
|         module.fail_json( msg="Source %s not readable" % (tmpsrc))
 | |
|     md5sum_src = module.md5(tmpsrc)
 | |
| 
 | |
|     # check if there is no dest file
 | |
|     if os.path.exists(dest):
 | |
|         # raise an error if copy has no permission on dest
 | |
|         if not os.access(dest, os.W_OK):
 | |
|             os.remove(tmpsrc)
 | |
|             module.fail_json( msg="Destination %s not writable" % (dest))
 | |
|         if not os.access(dest, os.R_OK):
 | |
|             os.remove(tmpsrc)
 | |
|             module.fail_json( msg="Destination %s not readable" % (dest))
 | |
|         md5sum_dest = module.md5(dest)
 | |
|     else:
 | |
|         if not os.access(os.path.dirname(dest), os.W_OK):
 | |
|             os.remove(tmpsrc)
 | |
|             module.fail_json( msg="Destination %s not writable" % (os.path.dirname(dest)))
 | |
| 
 | |
|     if md5sum_src != md5sum_dest:
 | |
|         try:
 | |
|             shutil.copyfile(tmpsrc, dest)
 | |
|         except Exception, err:
 | |
|             os.remove(tmpsrc)
 | |
|             module.fail_json(msg="failed to copy %s to %s: %s" % (tmpsrc, dest, str(err)))
 | |
|         changed = True
 | |
|     else:
 | |
|         changed = False
 | |
| 
 | |
|     # Check the digest of the destination file and ensure that it matches the 
 | |
|     # sha256sum parameter if it is present
 | |
|     if sha256sum != '':
 | |
|         # Remove any non-alphanumeric characters, including the infamous
 | |
|         # Unicode zero-width space
 | |
|         stripped_sha256sum = re.sub(r'\W+', '', sha256sum)
 | |
| 
 | |
|         if not HAS_HASHLIB:
 | |
|             os.remove(dest)
 | |
|             module.fail_json(msg="The sha256sum parameter requires hashlib, which is available in Python 2.5 and higher")
 | |
|         else:
 | |
|             destination_checksum = module.sha256(dest)
 | |
| 
 | |
|         if stripped_sha256sum != destination_checksum:
 | |
|             os.remove(dest)
 | |
|             module.fail_json(msg="The SHA-256 checksum for %s did not match %s; it was %s." % (dest, sha256sum, destination_checksum))
 | |
| 
 | |
|     os.remove(tmpsrc)
 | |
| 
 | |
|     # allow file attribute changes
 | |
|     module.params['path'] = dest
 | |
|     file_args = module.load_file_common_arguments(module.params)
 | |
|     file_args['path'] = dest
 | |
|     changed = module.set_file_attributes_if_different(file_args, changed)
 | |
| 
 | |
|     # Mission complete
 | |
|     module.exit_json(url=url, dest=dest, src=tmpsrc, md5sum=md5sum_src,
 | |
|         sha256sum=sha256sum, changed=changed, msg=info.get('msg', ''))
 | |
| 
 | |
| # this is magic, see lib/ansible/module_common.py
 | |
| #<<INCLUDE_ANSIBLE_MODULE_COMMON>>
 | |
| main()
 |