Allow specifying the output encoding in the template module (#42171)

Allow specifying the source and destination files' encodings in the template module * Added output_encoding to the template module, default to utf-8 * Added documentation for the new variables * Leveraged the encoding argument on to_text() and to_bytes() to keep the implementation as simple as possible * Added integration tests with files in utf-8 and windows-1252 encodings, testing all combinations * fix bad smell test by excluding windows-1252 files from the utf8 checks * fix bad smell test by excluding valid files from the smart quote test
2024-09-14 20:13:21 +02:00 · 2018-07-25 22:10:40 +02:00 · 2018-07-25 22:10:40 +02:00 · 19dc267e4c
commit 19dc267e4c
parent 5b4a7cc283
8 changed files with 44 additions and 1 deletions
--- a/changelogs/fragments/template_output_encoding.yml
+++ b/changelogs/fragments/template_output_encoding.yml
@ -0,0 +1,5 @@
+---
+minor_changes:
+- Explicit encoding for the output of the template module, to be able
+  to generate non-utf8 files from a utf-8 template.
+  (https://github.com/ansible/proposals/issues/121)
--- a/lib/ansible/modules/files/template.py
+++ b/lib/ansible/modules/files/template.py
@ -106,6 +106,13 @@ options:
        may be specified as a symbolic mode (for example, C(u+rwx) or C(u=rw,g=r,o=r)).  As of
        version 2.6, the mode may also be the special string C(preserve).  C(preserve) means that
        the file will be given the same permissions as the source file."
+  output_encoding:
+    description:
+      - Overrides the encoding used to write the template file defined by C(dest).
+      - It defaults to C('utf-8'), but any encoding supported by python can be used.
+      - The source template file must always be encoded using C('utf-8'), for homogeneity.
+    default: 'utf-8'
+    version_added: "2.7"
 notes:
  - For Windows you can use M(win_template) which uses '\\r\\n' as C(newline_sequence).
  - Including a string that uses a date in the template will result in the template being marked 'changed' each time
--- a/lib/ansible/plugins/action/template.py
+++ b/lib/ansible/plugins/action/template.py
@ -58,6 +58,7 @@ class ActionModule(ActionBase):
        block_end_string = self._task.args.get('block_end_string', None)
        trim_blocks = boolean(self._task.args.get('trim_blocks', True), strict=False)
        lstrip_blocks = boolean(self._task.args.get('lstrip_blocks', False), strict=False)
+        output_encoding = self._task.args.get('output_encoding', 'utf-8') or 'utf-8'

        # Option `lstrip_blocks' was added in Jinja2 version 2.7.
        if lstrip_blocks:
@ -176,13 +177,14 @@ class ActionModule(ActionBase):
            new_task.args.pop('variable_end_string', None)
            new_task.args.pop('trim_blocks', None)
            new_task.args.pop('lstrip_blocks', None)
+            new_task.args.pop('output_encoding', None)

            local_tempdir = tempfile.mkdtemp(dir=C.DEFAULT_LOCAL_TMP)

            try:
                result_file = os.path.join(local_tempdir, os.path.basename(source))
                with open(to_bytes(result_file, errors='surrogate_or_strict'), 'wb') as f:
-                    f.write(to_bytes(resultant, errors='surrogate_or_strict'))
+                    f.write(to_bytes(resultant, encoding=output_encoding, errors='surrogate_or_strict'))

                new_task.args.update(
                    dict(
--- a/test/integration/targets/template/files/encoding_1252_utf-8.expected
+++ b/test/integration/targets/template/files/encoding_1252_utf-8.expected
@ -0,0 +1 @@
+windows-1252 Special Characters: €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
--- a/test/integration/targets/template/files/encoding_1252_windows-1252.expected
+++ b/test/integration/targets/template/files/encoding_1252_windows-1252.expected
@ -0,0 +1 @@
+windows-1252 Special Characters: <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>、<EFBFBD>ウЖ<E382A6><D096>⑭<EFBFBD>渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡<E9AB93><E9BDA1>巐鄕<E5B790><E98495>
--- a/test/integration/targets/template/tasks/main.yml
+++ b/test/integration/targets/template/tasks/main.yml
@ -619,5 +619,28 @@
      - 'template_results.mode == "0547"'
      - 'stat_results.stat["mode"] == "0547"'

+# Test output_encoding
+- name: Prepare the list of encodings we want to check, including empty string for defaults
+  set_fact:
+    template_encoding_1252_encodings: ['', 'utf-8', 'windows-1252']
+
+- name: Copy known good encoding_1252_*.expected into place
+  copy:
+    src: 'encoding_1252_{{ item | default("utf-8", true) }}.expected'
+    dest: '{{ output_dir }}/encoding_1252_{{ item }}.expected'
+  loop: '{{ template_encoding_1252_encodings }}'
+
+- name: Generate the encoding_1252_* files from templates using various encoding combinations
+  template:
+    src: 'encoding_1252.j2'
+    dest: '{{ output_dir }}/encoding_1252_{{ item }}.txt'
+    output_encoding: '{{ item }}'
+  loop: '{{ template_encoding_1252_encodings }}'
+
+- name: Compare the encoding_1252_* templated files to known good
+  command: diff -u {{ output_dir }}/encoding_1252_{{ item }}.expected {{ output_dir }}/encoding_1252_{{ item }}.txt
+  register: encoding_1252_diff_result
+  loop: '{{ template_encoding_1252_encodings }}'
+
 # aliases file requires root for template tests so this should be safe
 - include: backup_test.yml
--- a/test/integration/targets/template/templates/encoding_1252.j2
+++ b/test/integration/targets/template/templates/encoding_1252.j2
@ -0,0 +1 @@
+windows-1252 Special Characters: €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
--- a/test/sanity/code-smell/no-smart-quotes.py
+++ b/test/sanity/code-smell/no-smart-quotes.py
@ -12,6 +12,9 @@ def main():
        'docs/docsite/rst/dev_guide/testing/sanity/no-smart-quotes.rst',
        'test/integration/targets/unicode/unicode.yml',
        'test/integration/targets/lookup_properties/lookup-8859-15.ini',
+        'test/integration/targets/template/files/encoding_1252_utf-8.expected',
+        'test/integration/targets/template/files/encoding_1252_windows-1252.expected',
+        'test/integration/targets/template/templates/encoding_1252.j2',
    ])

    for path in sys.argv[1:] or sys.stdin.read().splitlines():
				`@ -0,0 +1 @@`
				`windows-1252 Special Characters: €‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ`
				`@ -0,0 +1 @@`
				`windows-1252 Special Characters: <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>、<EFBFBD>ウЖ<E382A6><D096>⑭<EFBFBD>渦慨偽係杭纂従神疎団兎波品北洋椀冫嘖孛忤掣桀毳烙痰邃繙艾蜉謖邇關髓齡<E9AB93><E9BDA1>巐鄕<E5B790><E98495>`