read_csv: new module to read CSV files (#49578)

* read_csv: new module to read CSV files * Add a doc reference to the csvfile lookup plugin * Enable the use of custom dialect options * Improve error handling * Fix PEP8 * Fix more PEP8 * Simplify custom dialect code * Add integration tests * Fixes for CI * Fix for python 2.6
2024-09-14 20:13:21 +02:00 · 2018-12-10 12:42:47 +01:00 · 2018-12-10 12:42:47 +01:00 · 23ae3aa32a
commit 23ae3aa32a
parent e25dac946f
3 changed files with 378 additions and 0 deletions
--- a/lib/ansible/modules/files/read_csv.py
+++ b/lib/ansible/modules/files/read_csv.py
@ -0,0 +1,235 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # Copyright: (c) 2018, Dag Wieers (@dagwieers) <dag@wieers.com>
 # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
 from __future__ import absolute_import, division, print_function
 __metaclass__ = type
 ANSIBLE_METADATA = {'metadata_version': '1.1',
                    'status': ['preview'],
                    'supported_by': 'community'}
 DOCUMENTATION = r'''
 ---
 module: read_csv
 version_added: '2.8'
 short_description: Read a CSV file
 description:
 - Read a CSV file and return a list or a dictionary, containing one dictionary per row.
 author:
 - Dag Wieers (@dagwieers)
 options:
  path:
    description:
    - The CSV filename to read data from.
    type: str
    required: yes
    aliases: [ filename ]
  key:
    description:
    - The column name used as a key for the resulting dictionary.
    - If C(key) is unset, the module returns a list of dictionaries,
      where each dictionary is a row in the CSV file.
    type: str
  dialect:
    description:
    - The CSV dialect to use when parsing the CSV file.
    - Possible values include C(excel), C(excel-tab) or C(unix).
    type: str
    default: excel
  fieldnames:
    description:
    - A list of field names for every column.
    - This is needed if the CSV does not have a header.
    type: list
  unique:
    description:
    - Whether the C(key) used is expected to be unique.
    type: bool
    default: yes
  delimiter:
    description:
    - A one-character string used to separate fields.
    - When using this parameter, you change the default value used by C(dialect).
    - The default value depends on the dialect used.
    type: str
  skipinitialspace:
    description:
    - Whether to ignore any whitespaces immediately following the delimiter.
    - When using this parameter, you change the default value used by C(dialect).
    - The default value depends on the dialect used.
    type: bool
  strict:
    description:
    - Whether to raise an exception on bad CSV input.
    - When using this parameter, you change the default value used by C(dialect).
    - The default value depends on the dialect used.
    type: bool
 notes:
 - Ansible also ships with the C(csvfile) lookup plugin, which can be used to do selective lookups in CSV files from Jinja.
 '''
 EXAMPLES = r'''
 # Example CSV file with header
 #
 #   name,uid,gid
 #   dag,500,500
 #   jeroen,501,500
 # Read a CSV file and access user 'dag'
 - name: Read users from CSV file and return a dictionary
  read_csv:
    path: users.csv
    key: name
  register: users
  delegate_to: localhost
 - debug:
    msg: 'User {{ users.dict.dag.name }} has UID {{ users.dict.dag.uid }} and GID {{ users.dict.dag.gid }}'
 # Read a CSV file and access the first item
 - name: Read users from CSV file and return a list
  read_csv:
    path: users.csv
  register: users
  delegate_to: localhost
 - debug:
    msg: 'User {{ users.list.1.name }} has UID {{ users.list.1.uid }} and GID {{ users.list.1.gid }}'
 # Example CSV file without header and semi-colon delimiter
 #
 #   dag;500;500
 #   jeroen;501;500
 # Read a CSV file without headers
 - name: Read users from CSV file and return a list
  read_csv:
    path: users.csv
    fieldnames: name,uid,gid
    delimiter: ';'
  register: users
  delegate_to: localhost
 '''
 RETURN = r'''
 dict:
  description: The CSV content as a dictionary.
  returned: success
  type: dict
  sample:
    dag:
      name: dag
      uid: 500
      gid: 500
    jeroen:
      name: jeroen
      uid: 501
      gid: 500
 list:
  description: The CSV content as a list.
  returned: success
  type: list
  sample:
  - name: dag
    uid: 500
    gid: 500
  - name: jeroen
    uid: 501
    gid: 500
 '''
 import csv
 from ansible.module_utils.basic import AnsibleModule
 from ansible.module_utils._text import to_text
 # Add Unix dialect from Python 3
 class unix_dialect(csv.Dialect):
    """Describe the usual properties of Unix-generated CSV files."""
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = False
    lineterminator = '\n'
    quoting = csv.QUOTE_ALL
 csv.register_dialect("unix", unix_dialect)
 def main():
    module = AnsibleModule(
        argument_spec=dict(
            path=dict(type='path', required=True, aliases=['filename']),
            dialect=dict(type='str', default='excel'),
            key=dict(type='str'),
            fieldnames=dict(type='list'),
            unique=dict(type='bool', default=True),
            delimiter=dict(type='str'),
            skipinitialspace=dict(type='bool'),
            strict=dict(type='bool'),
        ),
        supports_check_mode=True,
    )
    path = module.params['path']
    dialect = module.params['dialect']
    key = module.params['key']
    fieldnames = module.params['fieldnames']
    unique = module.params['unique']
    if dialect not in csv.list_dialects():
        module.fail_json(msg="Dialect '%s' is not supported by your version of python." % dialect)
    dialect_options = dict(
        delimiter=module.params['delimiter'],
        skipinitialspace=module.params['skipinitialspace'],
        strict=module.params['strict'],
    )
    # Create a dictionary from only set options
    dialect_params = dict((k, v) for k, v in dialect_options.items() if v is not None)
    if dialect_params:
        try:
            csv.register_dialect('custom', dialect, **dialect_params)
        except TypeError as e:
            module.fail_json(msg="Unable to create custom dialect: %s" % to_text(e))
        dialect = 'custom'
    try:
        f = open(path, 'r')
    except (IOError, OSError) as e:
        module.fail_json(msg="Unable to open file: %s" % to_text(e))
    reader = csv.DictReader(f, fieldnames=fieldnames, dialect=dialect)
    if key and key not in reader.fieldnames:
        module.fail_json(msg="Key '%s' was not found in the CSV header fields: %s" % (key, ', '.join(reader.fieldnames)))
    data_dict = dict()
    data_list = list()
    if key is None:
        try:
            for row in reader:
                data_list.append(row)
        except csv.Error as e:
            module.fail_json(msg="Unable to process file: %s" % to_text(e))
    else:
        try:
            for row in reader:
                if unique and row[key] in data_dict:
                    module.fail_json(msg="Key '%s' is not unique for value '%s'" % (key, row[key]))
                data_dict[row[key]] = row
        except csv.Error as e:
            module.fail_json(msg="Unable to process file: %s" % to_text(e))
    module.exit_json(dict=data_dict, list=data_list)
 if __name__ == '__main__':
    main()
--- a/test/integration/targets/read_csv/aliases
+++ b/test/integration/targets/read_csv/aliases
@ -0,0 +1 @@
 shippable/posix/group2
--- a/test/integration/targets/read_csv/tasks/main.yml
+++ b/test/integration/targets/read_csv/tasks/main.yml
@ -0,0 +1,142 @@
 # Create basic CSV file
 - name: Create unique CSV file
  copy:
    content: |
      name,uid,gid,gecos
      dag,500,500,Dag Wieërs
      jeroen,501,500,Jeroen Hoekx
    dest: users_unique.csv
 # Read a CSV file and access user 'dag'
 - name: Read users from CSV file and return a dictionary
  read_csv:
    path: users_unique.csv
    key: name
  register: users_unique
 - assert:
    that:
    - users_unique.dict.dag.name == 'dag'
    - users_unique.dict.dag.gecos == 'Dag Wieërs'
    - users_unique.dict.dag.uid == '500'
    - users_unique.dict.dag.gid == '500'
    - users_unique.dict.jeroen.name == 'jeroen'
    - users_unique.dict.jeroen.gecos == 'Jeroen Hoekx'
    - users_unique.dict.jeroen.uid == '501'
    - users_unique.dict.jeroen.gid == '500'
 # Read a CSV file and access the first item
 - name: Read users from CSV file and return a list
  read_csv:
    path: users_unique.csv
  register: users_unique
 - assert:
    that:
    - users_unique.list.0.name == 'dag'
    - users_unique.list.0.gecos == 'Dag Wieërs'
    - users_unique.list.0.uid == '500'
    - users_unique.list.0.gid == '500'
    - users_unique.list.1.name == 'jeroen'
    - users_unique.list.1.gecos == 'Jeroen Hoekx'
    - users_unique.list.1.uid == '501'
    - users_unique.list.1.gid == '500'
 # Create basic CSV file using semi-colon
 - name: Create non-unique CSV file using semi-colon
  copy:
    content: |
      name;uid;gid;gecos
      dag;500;500;Dag Wieërs
      jeroen;501;500;Jeroen Hoekx
      dag;502;500;Dag Wieers
    dest: users_nonunique.csv
 # Read a CSV file and access user 'dag'
 - name: Read users from CSV file and return a dictionary
  read_csv:
    path: users_nonunique.csv
    key: name
    unique: no
    delimiter: ';'
  register: users_nonunique
  delegate_to: localhost
 - assert:
    that:
    - users_nonunique.dict.dag.name == 'dag'
    - users_nonunique.dict.dag.gecos == 'Dag Wieers'
    - users_nonunique.dict.dag.uid == '502'
    - users_nonunique.dict.dag.gid == '500'
    - users_nonunique.dict.jeroen.name == 'jeroen'
    - users_nonunique.dict.jeroen.gecos == 'Jeroen Hoekx'
    - users_nonunique.dict.jeroen.uid == '501'
    - users_nonunique.dict.jeroen.gid == '500'
 # Read a CSV file using an non-existing dialect
 - name: Read users from CSV file and return a dictionary
  read_csv:
    path: users_nonunique.csv
    dialect: placebo
  register: users_placebo
  ignore_errors: yes
 - assert:
    that:
    - users_placebo is failed
    - users_placebo.msg == "Dialect 'placebo' is not supported by your version of python."
 # Create basic CSV file without header
 - name: Create unique CSV file without header
  copy:
    content: |
      dag,500,500,Dag Wieërs
      jeroen,501,500,Jeroen Hoekx
    dest: users_noheader.csv
  delegate_to: localhost
 # Read a CSV file and access user 'dag'
 - name: Read users from CSV file and return a dictionary
  read_csv:
    path: users_noheader.csv
    key: name
    fieldnames: name,uid,gid,gecos
  register: users_noheader
 - assert:
    that:
    - users_noheader.dict.dag.name == 'dag'
    - users_noheader.dict.dag.gecos == 'Dag Wieërs'
    - users_noheader.dict.dag.uid == '500'
    - users_noheader.dict.dag.gid == '500'
    - users_noheader.dict.jeroen.name == 'jeroen'
    - users_noheader.dict.jeroen.gecos == 'Jeroen Hoekx'
    - users_noheader.dict.jeroen.uid == '501'
    - users_noheader.dict.jeroen.gid == '500'
 # Create broken file
 - name: Create unique CSV file
  copy:
    content: |
      name,uid,gid,gecos
      dag,500,500,Dag Wieërs
      jeroen,501,500,"Jeroen"Hoekx"
    dest: users_broken.csv
 # Read a broken CSV file using strict
 - name: Read users from a broken CSV file
  read_csv:
    path: users_broken.csv
    key: name
    strict: yes
  register: users_broken
  ignore_errors: yes
 - assert:
    that:
    - users_broken is failed
    - "'Unable to process file' in users_broken.msg"