From adca0d5d75ff31f2d802db07a36ecf847888d533 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Thu, 15 Jun 2023 08:42:27 +0200 Subject: [PATCH] [PR #6662/bb216934 backport][stable-7] csv module utils: detect unicode BOM in content (#6695) csv module utils: detect unicode BOM in content (#6662) * csv module utils: detect unicode BOM in content * fix handling of py2 * fix comment * add changelog frag * add missing link * simplification (cherry picked from commit bb2169340d04b70f3a35f6e321ca5dfe444fb3aa) Co-authored-by: Alexei Znamensky <103110+russoz@users.noreply.github.com> --- changelogs/fragments/6662-csv-bom.yml | 2 + plugins/module_utils/csv.py | 4 +- .../targets/read_csv/meta/main.yml | 7 +++ .../targets/read_csv/tasks/main.yml | 50 ++++++++++++++----- 4 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 changelogs/fragments/6662-csv-bom.yml create mode 100644 tests/integration/targets/read_csv/meta/main.yml diff --git a/changelogs/fragments/6662-csv-bom.yml b/changelogs/fragments/6662-csv-bom.yml new file mode 100644 index 0000000000..e9c617219c --- /dev/null +++ b/changelogs/fragments/6662-csv-bom.yml @@ -0,0 +1,2 @@ +bugfixes: + - csv module utils - detects and remove unicode BOM markers from incoming CSV content (https://github.com/ansible-collections/community.general/pull/6662). diff --git a/plugins/module_utils/csv.py b/plugins/module_utils/csv.py index 50d2cb3868..200548a46d 100644 --- a/plugins/module_utils/csv.py +++ b/plugins/module_utils/csv.py @@ -55,8 +55,10 @@ def initialize_dialect(dialect, **kwargs): def read_csv(data, dialect, fieldnames=None): - + BOM = to_native(u'\ufeff') data = to_native(data, errors='surrogate_or_strict') + if data.startswith(BOM): + data = data[len(BOM):] if PY3: fake_fh = StringIO(data) diff --git a/tests/integration/targets/read_csv/meta/main.yml b/tests/integration/targets/read_csv/meta/main.yml new file mode 100644 index 0000000000..982de6eb03 --- /dev/null +++ b/tests/integration/targets/read_csv/meta/main.yml @@ -0,0 +1,7 @@ +--- +# Copyright (c) Ansible Project +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +dependencies: + - setup_remote_tmp_dir diff --git a/tests/integration/targets/read_csv/tasks/main.yml b/tests/integration/targets/read_csv/tasks/main.yml index a21af95182..c09349dd5b 100644 --- a/tests/integration/targets/read_csv/tasks/main.yml +++ b/tests/integration/targets/read_csv/tasks/main.yml @@ -11,16 +11,16 @@ # Create basic CSV file - name: Create unique CSV file copy: - content: | + content: &users_content | name,uid,gid,gecos dag,500,500,Dag Wieërs jeroen,501,500,Jeroen Hoekx - dest: users_unique.csv + dest: "{{ remote_tmp_dir }}/users_unique.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_unique.csv + path: "{{ remote_tmp_dir }}/users_unique.csv" key: name register: users_unique @@ -35,10 +35,10 @@ - users_unique.dict.jeroen.uid == '501' - users_unique.dict.jeroen.gid == '500' - # Read a CSV file and access the first item +# Read a CSV file and access the first item - name: Read users from CSV file and return a list read_csv: - path: users_unique.csv + path: "{{ remote_tmp_dir }}/users_unique.csv" register: users_unique - assert: @@ -61,12 +61,12 @@ dag;500;500;Dag Wieërs jeroen;501;500;Jeroen Hoekx dag;502;500;Dag Wieers - dest: users_nonunique.csv + dest: "{{ remote_tmp_dir }}/users_nonunique.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_nonunique.csv + path: "{{ remote_tmp_dir }}/users_nonunique.csv" key: name unique: false delimiter: ';' @@ -87,7 +87,7 @@ # Read a CSV file using an non-existing dialect - name: Read users from CSV file and return a dictionary read_csv: - path: users_nonunique.csv + path: "{{ remote_tmp_dir }}/users_nonunique.csv" dialect: placebo register: users_placebo ignore_errors: true @@ -104,12 +104,12 @@ content: | dag,500,500,Dag Wieërs jeroen,501,500,Jeroen Hoekx - dest: users_noheader.csv + dest: "{{ remote_tmp_dir }}/users_noheader.csv" # Read a CSV file and access user 'dag' - name: Read users from CSV file and return a dictionary read_csv: - path: users_noheader.csv + path: "{{ remote_tmp_dir }}/users_noheader.csv" key: name fieldnames: name,uid,gid,gecos register: users_noheader @@ -133,12 +133,12 @@ name,uid,gid,gecos dag,500,500,Dag Wieërs jeroen,501,500,"Jeroen"Hoekx" - dest: users_broken.csv + dest: "{{ remote_tmp_dir }}/users_broken.csv" # Read a broken CSV file using strict - name: Read users from a broken CSV file read_csv: - path: users_broken.csv + path: "{{ remote_tmp_dir }}/users_broken.csv" key: name strict: true register: users_broken @@ -148,3 +148,29 @@ that: - users_broken is failed - "'Unable to process file' in users_broken.msg" + +# Create basic CSV file with BOM +- name: Create unique CSV file with BOM + copy: + content: "{{ bom + content }}" + dest: "{{ remote_tmp_dir }}/users_bom.csv" + vars: + content: *users_content + bom: "{{ '\ufeff' }}" + + # Read a CSV file and access the first item +- name: Read users from CSV file and return a list + read_csv: + path: "{{ remote_tmp_dir }}/users_bom.csv" + register: users_bom + +- assert: + that: + - users_bom.list.0.name == 'dag' + - users_bom.list.0.gecos == 'Dag Wieërs' + - users_bom.list.0.uid == '500' + - users_bom.list.0.gid == '500' + - users_bom.list.1.name == 'jeroen' + - users_bom.list.1.gecos == 'Jeroen Hoekx' + - users_bom.list.1.uid == '501' + - users_bom.list.1.gid == '500'