1
0
Fork 0
mirror of https://github.com/ansible-collections/community.general.git synced 2024-09-14 20:13:21 +02:00

csv module utils: detect unicode BOM in content (#6662)

* csv module utils: detect unicode BOM in content

* fix handling of py2

* fix comment

* add changelog frag

* add missing link

* simplification
This commit is contained in:
Alexei Znamensky 2023-06-15 17:19:16 +12:00 committed by GitHub
parent 9395df1c6f
commit bb2169340d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 13 deletions

View file

@ -0,0 +1,2 @@
bugfixes:
- csv module utils - detects and remove unicode BOM markers from incoming CSV content (https://github.com/ansible-collections/community.general/pull/6662).

View file

@ -55,8 +55,10 @@ def initialize_dialect(dialect, **kwargs):
def read_csv(data, dialect, fieldnames=None): def read_csv(data, dialect, fieldnames=None):
BOM = to_native(u'\ufeff')
data = to_native(data, errors='surrogate_or_strict') data = to_native(data, errors='surrogate_or_strict')
if data.startswith(BOM):
data = data[len(BOM):]
if PY3: if PY3:
fake_fh = StringIO(data) fake_fh = StringIO(data)

View file

@ -0,0 +1,7 @@
---
# Copyright (c) Ansible Project
# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt)
# SPDX-License-Identifier: GPL-3.0-or-later
dependencies:
- setup_remote_tmp_dir

View file

@ -11,16 +11,16 @@
# Create basic CSV file # Create basic CSV file
- name: Create unique CSV file - name: Create unique CSV file
copy: copy:
content: | content: &users_content |
name,uid,gid,gecos name,uid,gid,gecos
dag,500,500,Dag Wieërs dag,500,500,Dag Wieërs
jeroen,501,500,Jeroen Hoekx jeroen,501,500,Jeroen Hoekx
dest: users_unique.csv dest: "{{ remote_tmp_dir }}/users_unique.csv"
# Read a CSV file and access user 'dag' # Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary - name: Read users from CSV file and return a dictionary
read_csv: read_csv:
path: users_unique.csv path: "{{ remote_tmp_dir }}/users_unique.csv"
key: name key: name
register: users_unique register: users_unique
@ -38,7 +38,7 @@
# Read a CSV file and access the first item # Read a CSV file and access the first item
- name: Read users from CSV file and return a list - name: Read users from CSV file and return a list
read_csv: read_csv:
path: users_unique.csv path: "{{ remote_tmp_dir }}/users_unique.csv"
register: users_unique register: users_unique
- assert: - assert:
@ -61,12 +61,12 @@
dag;500;500;Dag Wieërs dag;500;500;Dag Wieërs
jeroen;501;500;Jeroen Hoekx jeroen;501;500;Jeroen Hoekx
dag;502;500;Dag Wieers dag;502;500;Dag Wieers
dest: users_nonunique.csv dest: "{{ remote_tmp_dir }}/users_nonunique.csv"
# Read a CSV file and access user 'dag' # Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary - name: Read users from CSV file and return a dictionary
read_csv: read_csv:
path: users_nonunique.csv path: "{{ remote_tmp_dir }}/users_nonunique.csv"
key: name key: name
unique: false unique: false
delimiter: ';' delimiter: ';'
@ -87,7 +87,7 @@
# Read a CSV file using an non-existing dialect # Read a CSV file using an non-existing dialect
- name: Read users from CSV file and return a dictionary - name: Read users from CSV file and return a dictionary
read_csv: read_csv:
path: users_nonunique.csv path: "{{ remote_tmp_dir }}/users_nonunique.csv"
dialect: placebo dialect: placebo
register: users_placebo register: users_placebo
ignore_errors: true ignore_errors: true
@ -104,12 +104,12 @@
content: | content: |
dag,500,500,Dag Wieërs dag,500,500,Dag Wieërs
jeroen,501,500,Jeroen Hoekx jeroen,501,500,Jeroen Hoekx
dest: users_noheader.csv dest: "{{ remote_tmp_dir }}/users_noheader.csv"
# Read a CSV file and access user 'dag' # Read a CSV file and access user 'dag'
- name: Read users from CSV file and return a dictionary - name: Read users from CSV file and return a dictionary
read_csv: read_csv:
path: users_noheader.csv path: "{{ remote_tmp_dir }}/users_noheader.csv"
key: name key: name
fieldnames: name,uid,gid,gecos fieldnames: name,uid,gid,gecos
register: users_noheader register: users_noheader
@ -133,12 +133,12 @@
name,uid,gid,gecos name,uid,gid,gecos
dag,500,500,Dag Wieërs dag,500,500,Dag Wieërs
jeroen,501,500,"Jeroen"Hoekx" jeroen,501,500,"Jeroen"Hoekx"
dest: users_broken.csv dest: "{{ remote_tmp_dir }}/users_broken.csv"
# Read a broken CSV file using strict # Read a broken CSV file using strict
- name: Read users from a broken CSV file - name: Read users from a broken CSV file
read_csv: read_csv:
path: users_broken.csv path: "{{ remote_tmp_dir }}/users_broken.csv"
key: name key: name
strict: true strict: true
register: users_broken register: users_broken
@ -148,3 +148,29 @@
that: that:
- users_broken is failed - users_broken is failed
- "'Unable to process file' in users_broken.msg" - "'Unable to process file' in users_broken.msg"
# Create basic CSV file with BOM
- name: Create unique CSV file with BOM
copy:
content: "{{ bom + content }}"
dest: "{{ remote_tmp_dir }}/users_bom.csv"
vars:
content: *users_content
bom: "{{ '\ufeff' }}"
# Read a CSV file and access the first item
- name: Read users from CSV file and return a list
read_csv:
path: "{{ remote_tmp_dir }}/users_bom.csv"
register: users_bom
- assert:
that:
- users_bom.list.0.name == 'dag'
- users_bom.list.0.gecos == 'Dag Wieërs'
- users_bom.list.0.uid == '500'
- users_bom.list.0.gid == '500'
- users_bom.list.1.name == 'jeroen'
- users_bom.list.1.gecos == 'Jeroen Hoekx'
- users_bom.list.1.uid == '501'
- users_bom.list.1.gid == '500'