diff --git a/changelogs/fragments/6662-csv-bom.yml b/changelogs/fragments/6662-csv-bom.yml new file mode 100644 index 0000000000..e9c617219c --- /dev/null +++ b/changelogs/fragments/6662-csv-bom.yml @@ -0,0 +1,2 @@ +bugfixes: + - csv module utils - detects and remove unicode BOM markers from incoming CSV content (https://github.com/ansible-collections/community.general/pull/6662). diff --git a/plugins/module_utils/csv.py b/plugins/module_utils/csv.py index 50d2cb3868..200548a46d 100644 --- a/plugins/module_utils/csv.py +++ b/plugins/module_utils/csv.py @@ -55,8 +55,10 @@ def initialize_dialect(dialect, **kwargs): def read_csv(data, dialect, fieldnames=None): - + BOM = to_native(u'\ufeff') data = to_native(data, errors='surrogate_or_strict') + if data.startswith(BOM): + data = data[len(BOM):] if PY3: fake_fh = StringIO(data) diff --git a/tests/integration/targets/read_csv/meta/main.yml b/tests/integration/targets/read_csv/meta/main.yml new file mode 100644 index 0000000000..982de6eb03 --- /dev/null +++ b/tests/integration/targets/read_csv/meta/main.yml @@ -0,0 +1,7 @@ +--- +# Copyright (c) Ansible Project +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +dependencies: + - setup_remote_tmp_dir diff --git a/tests/integration/targets/read_csv/tasks/main.yml b/tests/integration/targets/read_csv/tasks/main.yml index a21af95182..c09349dd5b 100644 --- a/tests/integration/targets/read_csv/tasks/main.yml +++ b/tests/integration/targets/read_csv/tasks/main.yml @@ -11,16 +11,16 @@ # Create basic CSV file - name: Create unique CSV file copy: - content: | + content: &users_content | name,uid,gid,gecos dag,500,500,Dag Wieërs jeroen,501,500,Jeroen Hoekx - dest: users_unique.csv + dest: "{{ remote_tmp_dir }}/users_unique.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_unique.csv + path: "{{ remote_tmp_dir }}/users_unique.csv" key: name register: users_unique @@ -35,10 +35,10 @@ - users_unique.dict.jeroen.uid == '501' - users_unique.dict.jeroen.gid == '500' - # Read a CSV file and access the first item +# Read a CSV file and access the first item - name: Read users from CSV file and return a list read_csv: - path: users_unique.csv + path: "{{ remote_tmp_dir }}/users_unique.csv" register: users_unique - assert: @@ -61,12 +61,12 @@ dag;500;500;Dag Wieërs jeroen;501;500;Jeroen Hoekx dag;502;500;Dag Wieers - dest: users_nonunique.csv + dest: "{{ remote_tmp_dir }}/users_nonunique.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_nonunique.csv + path: "{{ remote_tmp_dir }}/users_nonunique.csv" key: name unique: false delimiter: ';' @@ -87,7 +87,7 @@ # Read a CSV file using an non-existing dialect - name: Read users from CSV file and return a dictionary read_csv: - path: users_nonunique.csv + path: "{{ remote_tmp_dir }}/users_nonunique.csv" dialect: placebo register: users_placebo ignore_errors: true @@ -104,12 +104,12 @@ content: | dag,500,500,Dag Wieërs jeroen,501,500,Jeroen Hoekx - dest: users_noheader.csv + dest: "{{ remote_tmp_dir }}/users_noheader.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_noheader.csv + path: "{{ remote_tmp_dir }}/users_noheader.csv" key: name fieldnames: name,uid,gid,gecos register: users_noheader @@ -133,12 +133,12 @@ name,uid,gid,gecos dag,500,500,Dag Wieërs jeroen,501,500,"Jeroen"Hoekx" - dest: users_broken.csv + dest: "{{ remote_tmp_dir }}/users_broken.csv" # Read a broken CSV file using strict - name: Read users from a broken CSV file read_csv: - path: users_broken.csv + path: "{{ remote_tmp_dir }}/users_broken.csv" key: name strict: true register: users_broken @@ -148,3 +148,29 @@ that: - users_broken is failed - "'Unable to process file' in users_broken.msg" + +# Create basic CSV file with BOM +- name: Create unique CSV file with BOM + copy: + content: "{{ bom + content }}" + dest: "{{ remote_tmp_dir }}/users_bom.csv" + vars: + content: *users_content + bom: "{{ '\ufeff' }}" + + # Read a CSV file and access the first item +- name: Read users from CSV file and return a list + read_csv: + path: "{{ remote_tmp_dir }}/users_bom.csv" + register: users_bom + +- assert: + that: + - users_bom.list.0.name == 'dag' + - users_bom.list.0.gecos == 'Dag Wieërs' + - users_bom.list.0.uid == '500' + - users_bom.list.0.gid == '500' + - users_bom.list.1.name == 'jeroen' + - users_bom.list.1.gecos == 'Jeroen Hoekx' + - users_bom.list.1.uid == '501' + - users_bom.list.1.gid == '500'