mirror of
https://github.com/ansible-collections/community.general.git
synced 2024-09-14 20:13:21 +02:00
dd0189839e
* Fix bug (#18355) where encrypted inventories fail This is first part of fix for #18355 * Make DataLoader._get_file_contents return bytes The issue #18355 is caused by a change to inventory to stop using _get_file_contents so that it can handle text encoding itself to better protect against harmless text encoding errors in ini files (invalid unicode text in comment fields). So this makes _get_file_contents return bytes so it and other callers can handle the to_text(). The data returned by _get_file_contents() is now a bytes object instead of a text object. The callers of _get_file_contents() have been updated to call to_text() themselves on the results. Previously, the ini parser attempted to work around ini files that potentially include non-vailid unicode in comment lines. To do this, it stopped using DataLoader._get_file_contents() which does the decryption of files if vault encrypted. It didn't use that because _get_file_contents previously did to_text() on the read data itself. _get_file_contents() returns a bytestring now, so ini.py can call it and still special case ini file comments when converting to_text(). That also means encrypted inventory files are decrypted first. Fixes #18355
376 lines
15 KiB
Python
376 lines
15 KiB
Python
# Copyright 2015 Abhijit Menon-Sen <ams@2ndQuadrant.com>
|
|
#
|
|
# This file is part of Ansible
|
|
#
|
|
# Ansible is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Ansible is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
#############################################
|
|
from __future__ import (absolute_import, division, print_function)
|
|
__metaclass__ = type
|
|
|
|
import ast
|
|
import re
|
|
|
|
from ansible import constants as C
|
|
from ansible.errors import AnsibleError
|
|
from ansible.inventory.host import Host
|
|
from ansible.inventory.group import Group
|
|
from ansible.inventory.expand_hosts import detect_range
|
|
from ansible.inventory.expand_hosts import expand_hostname_range
|
|
from ansible.module_utils._text import to_text
|
|
from ansible.parsing.utils.addresses import parse_address
|
|
from ansible.utils.shlex import shlex_split
|
|
|
|
|
|
class InventoryParser(object):
|
|
"""
|
|
Takes an INI-format inventory file and builds a list of groups and subgroups
|
|
with their associated hosts and variable settings.
|
|
"""
|
|
|
|
def __init__(self, loader, groups, filename=C.DEFAULT_HOST_LIST):
|
|
self.filename = filename
|
|
|
|
# Start with an empty host list and whatever groups we're passed in
|
|
# (which should include the default 'all' and 'ungrouped' groups).
|
|
|
|
self.hosts = {}
|
|
self.patterns = {}
|
|
self.groups = groups
|
|
|
|
# Read in the hosts, groups, and variables defined in the
|
|
# inventory file.
|
|
if loader:
|
|
(b_data, private) = loader._get_file_contents(filename)
|
|
else:
|
|
with open(filename, 'rb') as fh:
|
|
b_data = fh.read()
|
|
|
|
try:
|
|
# Faster to do to_text once on a long string than many
|
|
# times on smaller strings
|
|
data = to_text(b_data, errors='surrogate_or_strict')
|
|
data = [line for line in data.splitlines() if not (line.startswith(u';') or line.startswith(u'#'))]
|
|
except UnicodeError:
|
|
# Skip comment lines here to avoid potential undecodable
|
|
# errors in comments: https://github.com/ansible/ansible/issues/17593
|
|
data = [to_text(line, errors='surrogate_or_strict') for line in b_data.splitlines() if not (line.startswith(b';') or line.startswith(b'#'))]
|
|
|
|
self._parse(data)
|
|
|
|
def _raise_error(self, message):
|
|
raise AnsibleError("%s:%d: " % (self.filename, self.lineno) + message)
|
|
|
|
def _parse(self, lines):
|
|
'''
|
|
Populates self.groups from the given array of lines. Raises an error on
|
|
any parse failure.
|
|
'''
|
|
|
|
self._compile_patterns()
|
|
|
|
# We behave as though the first line of the inventory is '[ungrouped]',
|
|
# and begin to look for host definitions. We make a single pass through
|
|
# each line of the inventory, building up self.groups and adding hosts,
|
|
# subgroups, and setting variables as we go.
|
|
|
|
pending_declarations = {}
|
|
groupname = 'ungrouped'
|
|
state = 'hosts'
|
|
|
|
self.lineno = 0
|
|
for line in lines:
|
|
self.lineno += 1
|
|
|
|
line = line.strip()
|
|
|
|
# Skip empty lines
|
|
if not line:
|
|
continue
|
|
|
|
# Is this a [section] header? That tells us what group we're parsing
|
|
# definitions for, and what kind of definitions to expect.
|
|
|
|
m = self.patterns['section'].match(line)
|
|
if m:
|
|
(groupname, state) = m.groups()
|
|
|
|
state = state or 'hosts'
|
|
if state not in ['hosts', 'children', 'vars']:
|
|
title = ":".join(m.groups())
|
|
self._raise_error("Section [%s] has unknown type: %s" % (title, state))
|
|
|
|
# If we haven't seen this group before, we add a new Group.
|
|
#
|
|
# Either [groupname] or [groupname:children] is sufficient to
|
|
# declare a group, but [groupname:vars] is allowed only if the
|
|
# group is declared elsewhere (not necessarily earlier). We add
|
|
# the group anyway, but make a note in pending_declarations to
|
|
# check at the end.
|
|
|
|
if groupname not in self.groups:
|
|
self.groups[groupname] = Group(name=groupname)
|
|
|
|
if state == 'vars':
|
|
pending_declarations[groupname] = dict(line=self.lineno, state=state, name=groupname)
|
|
|
|
# When we see a declaration that we've been waiting for, we can
|
|
# delete the note.
|
|
|
|
if groupname in pending_declarations and state != 'vars':
|
|
del pending_declarations[groupname]
|
|
|
|
continue
|
|
elif line.startswith('[') and line.endswith(']'):
|
|
self._raise_error("Invalid section entry: '%s'. Please make sure that there are no spaces" % line +
|
|
"in the section entry, and that there are no other invalid characters")
|
|
|
|
# It's not a section, so the current state tells us what kind of
|
|
# definition it must be. The individual parsers will raise an
|
|
# error if we feed them something they can't digest.
|
|
|
|
# [groupname] contains host definitions that must be added to
|
|
# the current group.
|
|
if state == 'hosts':
|
|
hosts = self._parse_host_definition(line)
|
|
for h in hosts:
|
|
self.groups[groupname].add_host(h)
|
|
|
|
# [groupname:vars] contains variable definitions that must be
|
|
# applied to the current group.
|
|
elif state == 'vars':
|
|
(k, v) = self._parse_variable_definition(line)
|
|
if k != 'ansible_group_priority':
|
|
self.groups[groupname].set_variable(k, v)
|
|
else:
|
|
self.groups[groupname].set_priority(v)
|
|
|
|
# [groupname:children] contains subgroup names that must be
|
|
# added as children of the current group. The subgroup names
|
|
# must themselves be declared as groups, but as before, they
|
|
# may only be declared later.
|
|
elif state == 'children':
|
|
child = self._parse_group_name(line)
|
|
|
|
if child not in self.groups:
|
|
self.groups[child] = Group(name=child)
|
|
pending_declarations[child] = dict(line=self.lineno, state=state, name=child, parent=groupname)
|
|
|
|
self.groups[groupname].add_child_group(self.groups[child])
|
|
|
|
# Note: there's no reason why we couldn't accept variable
|
|
# definitions here, and set them on the named child group.
|
|
|
|
# This is a fencepost. It can happen only if the state checker
|
|
# accepts a state that isn't handled above.
|
|
else:
|
|
self._raise_error("Entered unhandled state: %s" % (state))
|
|
|
|
# Any entries in pending_declarations not removed by a group declaration
|
|
# above mean that there was an unresolved forward reference. We report
|
|
# only the first such error here.
|
|
|
|
for g in pending_declarations:
|
|
decl = pending_declarations[g]
|
|
if decl['state'] == 'vars':
|
|
raise AnsibleError("%s:%d: Section [%s:vars] not valid for undefined group: %s" % (self.filename, decl['line'], decl['name'], decl['name']))
|
|
elif decl['state'] == 'children':
|
|
raise AnsibleError("%s:%d: Section [%s:children] includes undefined group: %s" % (self.filename, decl['line'], decl['parent'], decl['name']))
|
|
|
|
# Finally, add all top-level groups as children of 'all'.
|
|
# We exclude ungrouped here because it was already added as a child of
|
|
# 'all' at the time it was created.
|
|
|
|
for group in self.groups.values():
|
|
if group.depth == 0 and group.name not in ('all', 'ungrouped'):
|
|
self.groups['all'].add_child_group(group)
|
|
|
|
def _parse_group_name(self, line):
|
|
'''
|
|
Takes a single line and tries to parse it as a group name. Returns the
|
|
group name if successful, or raises an error.
|
|
'''
|
|
|
|
m = self.patterns['groupname'].match(line)
|
|
if m:
|
|
return m.group(1)
|
|
|
|
self._raise_error("Expected group name, got: %s" % (line))
|
|
|
|
def _parse_variable_definition(self, line):
|
|
'''
|
|
Takes a string and tries to parse it as a variable definition. Returns
|
|
the key and value if successful, or raises an error.
|
|
'''
|
|
|
|
# TODO: We parse variable assignments as a key (anything to the left of
|
|
# an '='"), an '=', and a value (anything left) and leave the value to
|
|
# _parse_value to sort out. We should be more systematic here about
|
|
# defining what is acceptable, how quotes work, and so on.
|
|
|
|
if '=' in line:
|
|
(k, v) = [e.strip() for e in line.split("=", 1)]
|
|
return (k, self._parse_value(v))
|
|
|
|
self._raise_error("Expected key=value, got: %s" % (line))
|
|
|
|
def _parse_host_definition(self, line):
|
|
'''
|
|
Takes a single line and tries to parse it as a host definition. Returns
|
|
a list of Hosts if successful, or raises an error.
|
|
'''
|
|
|
|
# A host definition comprises (1) a non-whitespace hostname or range,
|
|
# optionally followed by (2) a series of key="some value" assignments.
|
|
# We ignore any trailing whitespace and/or comments. For example, here
|
|
# are a series of host definitions in a group:
|
|
#
|
|
# [groupname]
|
|
# alpha
|
|
# beta:2345 user=admin # we'll tell shlex
|
|
# gamma sudo=True user=root # to ignore comments
|
|
|
|
try:
|
|
tokens = shlex_split(line, comments=True)
|
|
except ValueError as e:
|
|
self._raise_error("Error parsing host definition '%s': %s" % (line, e))
|
|
|
|
(hostnames, port) = self._expand_hostpattern(tokens[0])
|
|
hosts = self._Hosts(hostnames, port)
|
|
|
|
# Try to process anything remaining as a series of key=value pairs.
|
|
|
|
variables = {}
|
|
for t in tokens[1:]:
|
|
if '=' not in t:
|
|
self._raise_error("Expected key=value host variable assignment, got: %s" % (t))
|
|
(k, v) = t.split('=', 1)
|
|
variables[k] = self._parse_value(v)
|
|
|
|
# Apply any variable settings found to every host.
|
|
|
|
for h in hosts:
|
|
for k in variables:
|
|
h.set_variable(k, variables[k])
|
|
if k in ['ansible_host', 'ansible_ssh_host']:
|
|
h.address = variables[k]
|
|
|
|
return hosts
|
|
|
|
def _expand_hostpattern(self, hostpattern):
|
|
'''
|
|
Takes a single host pattern and returns a list of hostnames and an
|
|
optional port number that applies to all of them.
|
|
'''
|
|
|
|
# Can the given hostpattern be parsed as a host with an optional port
|
|
# specification?
|
|
|
|
try:
|
|
(pattern, port) = parse_address(hostpattern, allow_ranges=True)
|
|
except:
|
|
# not a recognizable host pattern
|
|
pattern = hostpattern
|
|
port = None
|
|
|
|
# Once we have separated the pattern, we expand it into list of one or
|
|
# more hostnames, depending on whether it contains any [x:y] ranges.
|
|
|
|
if detect_range(pattern):
|
|
hostnames = expand_hostname_range(pattern)
|
|
else:
|
|
hostnames = [pattern]
|
|
|
|
return (hostnames, port)
|
|
|
|
def _Hosts(self, hostnames, port):
|
|
'''
|
|
Takes a list of hostnames and a port (which may be None) and returns a
|
|
list of Hosts (without recreating anything in self.hosts).
|
|
'''
|
|
|
|
hosts = []
|
|
|
|
# Note that we decide whether or not to create a Host based solely on
|
|
# the (non-)existence of its hostname in self.hosts. This means that one
|
|
# cannot add both "foo:22" and "foo:23" to the inventory.
|
|
|
|
for hn in hostnames:
|
|
if hn not in self.hosts:
|
|
self.hosts[hn] = Host(name=hn, port=port)
|
|
hosts.append(self.hosts[hn])
|
|
|
|
return hosts
|
|
|
|
@staticmethod
|
|
def _parse_value(v):
|
|
'''
|
|
Attempt to transform the string value from an ini file into a basic python object
|
|
(int, dict, list, unicode string, etc).
|
|
'''
|
|
if "#" not in v:
|
|
try:
|
|
v = ast.literal_eval(v)
|
|
# Using explicit exceptions.
|
|
# Likely a string that literal_eval does not like. We wil then just set it.
|
|
except ValueError:
|
|
# For some reason this was thought to be malformed.
|
|
pass
|
|
except SyntaxError:
|
|
# Is this a hash with an equals at the end?
|
|
pass
|
|
return to_text(v, nonstring='passthru', errors='surrogate_or_strict')
|
|
|
|
def get_host_variables(self, host):
|
|
return {}
|
|
|
|
def _compile_patterns(self):
|
|
'''
|
|
Compiles the regular expressions required to parse the inventory and
|
|
stores them in self.patterns.
|
|
'''
|
|
|
|
# Section names are square-bracketed expressions at the beginning of a
|
|
# line, comprising (1) a group name optionally followed by (2) a tag
|
|
# that specifies the contents of the section. We ignore any trailing
|
|
# whitespace and/or comments. For example:
|
|
#
|
|
# [groupname]
|
|
# [somegroup:vars]
|
|
# [naughty:children] # only get coal in their stockings
|
|
|
|
self.patterns['section'] = re.compile(
|
|
r'''^\[
|
|
([^:\]\s]+) # group name (see groupname below)
|
|
(?::(\w+))? # optional : and tag name
|
|
\]
|
|
\s* # ignore trailing whitespace
|
|
(?:\#.*)? # and/or a comment till the
|
|
$ # end of the line
|
|
''', re.X
|
|
)
|
|
|
|
# FIXME: What are the real restrictions on group names, or rather, what
|
|
# should they be? At the moment, they must be non-empty sequences of non
|
|
# whitespace characters excluding ':' and ']', but we should define more
|
|
# precise rules in order to support better diagnostics.
|
|
|
|
self.patterns['groupname'] = re.compile(
|
|
r'''^
|
|
([^:\]\s]+)
|
|
\s* # ignore trailing whitespace
|
|
(?:\#.*)? # and/or a comment till the
|
|
$ # end of the line
|
|
''', re.X
|
|
)
|