From 1284c49bd7af15875da5a88fb6d698becee0e24b Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Fri, 14 Aug 2015 16:20:27 +0530 Subject: [PATCH 1/5] Rewrite the INI InventoryParser The new code parses INI-format inventory files in a single pass using a well-documented state machine that reports precise errors and eliminates the duplications and inconsistencies and outright errors in the earlier three-phase parsing code (e.g. three ways to skip comments). It is also much easier now to follow what decisions are being taken on the basis of the parsed data. The comments point out various potential improvements, particularly in the area of consistent IPv6 handling. On the ornate marble tombstone of the old code, the following inscription is one last baffling memento from a bygone age: - def _before_comment(self, msg): - ''' what's the part of a string before a comment? ''' - msg = msg.replace("\#","**NOT_A_COMMENT**") - msg = msg.split("#")[0] - msg = msg.replace("**NOT_A_COMMENT**","#") - return msg --- lib/ansible/inventory/ini.py | 486 +++++++++++++++++++++++------------ 1 file changed, 325 insertions(+), 161 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index ce92c31bc1..eddaf93187 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -1,4 +1,4 @@ -# (c) 2012-2014, Michael DeHaan +# Copyright 2015 Abhijit Menon-Sen # # This file is part of Ansible # @@ -33,28 +33,288 @@ from ansible.utils.unicode import to_unicode class InventoryParser(object): """ - Host inventory for ansible. + Takes an INI-format inventory file and builds a list of groups and subgroups + with their associated hosts and variable settings. """ def __init__(self, filename=C.DEFAULT_HOST_LIST): self.filename = filename + # Start with an empty host list and the default 'all' and + # 'ungrouped' groups. + + self.hosts = {} + self.patterns = {} + self.groups = dict( + all = Group(name='all'), + ungrouped = Group(name='ungrouped') + ) + + # Read in the hosts, groups, and variables defined in the + # inventory file. + with open(filename) as fh: self.lines = fh.readlines() - self.groups = {} - self.hosts = {} self._parse() - def _parse(self): + # Finally, add all top-level groups (including 'ungrouped') as + # children of 'all'. - self._parse_base_groups() - self._parse_group_children() - self._add_allgroup_children() - self._parse_group_variables() - return self.groups + for group in self.groups.values(): + if group.depth == 0 and group.name != 'all': + self.groups['all'].add_child_group(group) + + # Note: we could discard self.hosts after this point. + + def _parse(self): + ''' + Populates self.groups from the contents of self.lines. Raises an error + on any parse failure. + ''' + + self._compile_patterns() + + # We behave as though the first line of the inventory is '[ungrouped]', + # and begin to look for host definitions. We make a single pass through + # each line of the inventory, building up self.groups and adding hosts, + # subgroups, and setting variables as we go. + + pending_declarations = {} + section = 'ungrouped' + state = 'hosts' + + i = 0 + for line in self.lines: + i += 1 + + # Is there a better way to get rid of the ending \n? + line = line.strip() + + # Skip empty lines and comments + if line == '' or line.startswith(";") or line.startswith("#"): + continue + + # Is this a [section] header? That tells us what group we're parsing + # definitions for, and what kind of definitions to expect. + + m = self.patterns['section'].match(line) + if m: + (section, state) = m.groups() + + state = state or 'hosts' + if state not in ['hosts', 'children', 'vars']: + title = ":".join(m.groups()) + raise AnsibleError("%s:%d: Section [%s] has unknown type: %s" % (self.filename, i, title, state)) + + # If we haven't seen this section before, we add a new Group. + # + # Either [groupname] or [groupname:children] is sufficient to + # declare a group, but [groupname:vars] is allowed only if the + # group is declared elsewhere (not necessarily earlier). We add + # the group anyway, but make a note in pending_declarations to + # check at the end. + + if section not in self.groups: + self.groups[section] = Group(name=section) + + if state == 'vars': + pending_declarations[section] = dict(line=i, state=state, name=section) + + # When we see a declaration that we've been waiting for, we can + # delete the note. + + if section in pending_declarations and state != 'vars': + del pending_declarations[section] + + continue + + # It's not a section, so the current state tells us what kind of + # definition it must be. The individual parsers will raise an + # error if we feed them something they can't digest. + + # [groupname] contains host definitions that must be added to + # the current group. + if state == 'hosts': + hosts = self._parse_host_definition(line, i) + for h in hosts: + self.groups[section].add_host(h) + + # [groupname:vars] contains variable definitions that must be + # applied to the current group. + elif state == 'vars': + (k, v) = self._parse_variable_definition(line, i) + self.groups[section].set_variable(k, v) + + # [groupname:children] contains subgroup names that must be + # added as children of the current group. The subgroup names + # must themselves be declared as groups, but as before, they + # may only be declared later. + elif state == 'children': + child = self._parse_group_name(line, i) + + if child not in self.groups: + self.groups[child] = Group(name=child) + pending_declarations[child] = dict(line=i, state=state, name=child, parent=section) + + self.groups[section].add_child_group(self.groups[child]) + + # Note: there's no reason why we couldn't accept variable + # definitions here, and set them on the named child group. + + # This is a fencepost. It can happen only if the state checker + # accepts a state that isn't handled above. + else: + raise AnsibleError("%s:%d: Entered unhandled state: %s" % (self.filename, i, state)) + + # Any entries in pending_declarations not removed by a group declaration + # above mean that there was an unresolved forward reference. We report + # only the first such error here. + + for g in pending_declarations: + decl = pending_declarations[g] + if decl['state'] == 'vars': + raise AnsibleError("%s:%d: Section [%s:vars] not valid for undefined group: %s" % (self.filename, decl['line'], decl['name'], decl['name'])) + elif decl['state'] == 'children': + raise AnsibleError("%s:%d: Section [%s:children] includes undefined group: %s" % (self.filename, decl['line'], decl['parent'], decl['name'])) + + def _parse_group_name(self, line, i): + ''' + Takes a single line and tries to parse it as a group name. Returns the + group name if successful, or raises an error. + ''' + + m = self.patterns['groupname'].match(line) + if m: + return m.group(1) + + raise AnsibleError("%s:%d: Expected group name, got: %s" % (self.filename, i, line)) + + def _parse_variable_definition(self, line, i): + ''' + Takes a string and tries to parse it as a variable definition. Returns + the key and value if successful, or raises an error. + ''' + + # TODO: We parse variable assignments as a key (anything to the left of + # an '='"), an '=', and a value (anything left) and leave the value to + # _parse_value to sort out. We should be more systematic here about + # defining what is acceptable, how quotes work, and so on. + + if '=' in line: + (k, v) = [e.strip() for e in line.split("=", 1)] + return (k, self._parse_value(v)) + + raise AnsibleError("%s:%d: Expected key=value, got: %s" % (self.filename, i, line)) + + def _parse_host_definition(self, line, i): + ''' + Takes a single line and tries to parse it as a host definition. Returns + a list of Hosts if successful, or raises an error. + ''' + + # A host definition comprises (1) a non-whitespace hostname or range, + # optionally followed by (2) a series of key="some value" assignments. + # We ignore any trailing whitespace and/or comments. For example, here + # are a series of host definitions in a group: + # + # [groupname] + # alpha + # beta:2345 user=admin # we'll tell shlex + # gamma sudo=True user=root # to ignore comments + + try: + tokens = shlex.split(line, comments=True) + except ValueError as e: + raise AnsibleError("%s:%d: Error parsing host definition '%s': %s" % (self.filename, i, varstring, e)) + + (hostnames, port) = self._expand_hostpattern(tokens[0]) + hosts = self._Hosts(hostnames, port) + + # Try to process anything remaining as a series of key=value pairs. + + variables = {} + for t in tokens[1:]: + if '=' not in t: + raise AnsibleError("%s:%d: Expected key=value host variable assignment, got: %s" % (self.filename, i, t)) + (k, v) = t.split('=', 1) + variables[k] = self._parse_value(v) + + # Apply any variable settings found to every host. + + for h in hosts: + for k in variables: + h.set_variable(k, variables[k]) + if k == 'ansible_ssh_host': + h.ipv4_address = variables[k] + + return hosts + + def _expand_hostpattern(self, pattern): + ''' + Takes a single host pattern and returns a list of hostnames and an + optional port number that applies to all of them. + ''' + + # First, we extract the port number. This is usually ":NN" at the end of + # the expression, but for IPv6 addresses it's ".NN" instead. In either + # case, we remove it. + + port = None + if ':' in pattern: + pos = pattern.rindex(':') + try: + port = int(pattern[pos+1:]) + pattern = pattern[0:pos] + except ValueError: + pass + else: + m = self.patterns['ipv6_hostport'].match(pattern) + if m: + (pattern, port) = m.groups() + + # We're done, because we know this is a single IPv6 address. + # But should we support ranges for IPv6 address generation? + # See the FIXME note below. We should probably just accept + # "[xxx]:nn" syntax instead, and then let xxx be expanded. + + return ([pattern], int(port)) + + # Now we're left with just the pattern, which results in a list of one + # or more hostnames, depending on whether it contains any [x:y] ranges. + + if detect_range(pattern): + hostnames = expand_hostname_range(pattern) + else: + hostnames = [pattern] + + return (hostnames, port) + + def _Hosts(self, hostnames, port): + ''' + Takes a list of hostnames and a port (which may be None) and returns a + list of Hosts (without recreating anything in self.hosts). + ''' + + hosts = [] + + # Note that we decide whether or not to create a Host based solely on + # the (non-)existence of its hostname in self.hosts. This means that one + # cannot add both "foo:22" and "foo:23" to the inventory. This behaviour + # is preserved for now, but this may be an easy FIXME. + + for hn in hostnames: + if hn not in self.hosts: + self.hosts[hn] = Host(name=hn, port=port) + hosts.append(self.hosts[hn]) + + return hosts @staticmethod def _parse_value(v): + ''' + Does something with something and returns something. Not for mere + mortals such as myself to interpret. + ''' if "#" not in v: try: v = ast.literal_eval(v) @@ -68,158 +328,62 @@ class InventoryParser(object): pass return to_unicode(v, nonstring='passthru', errors='strict') - # [webservers] - # alpha - # beta:2345 - # gamma sudo=True user=root - # delta asdf=jkl favcolor=red - - def _add_allgroup_children(self): - - for group in self.groups.values(): - if group.depth == 0 and group.name != 'all': - self.groups['all'].add_child_group(group) - - - def _parse_base_groups(self): - # FIXME: refactor - - ungrouped = Group(name='ungrouped') - all = Group(name='all') - all.add_child_group(ungrouped) - - self.groups = dict(all=all, ungrouped=ungrouped) - active_group_name = 'ungrouped' - - i = 0 - for line in self.lines: - i += 1 - line = self._before_comment(line).strip() - if line.startswith("[") and line.endswith("]"): - active_group_name = line.replace("[","").replace("]","") - if ":vars" in line or ":children" in line: - active_group_name = active_group_name.rsplit(":", 1)[0] - if active_group_name not in self.groups: - new_group = self.groups[active_group_name] = Group(name=active_group_name) - active_group_name = None - elif active_group_name not in self.groups: - new_group = self.groups[active_group_name] = Group(name=active_group_name) - elif line.startswith(";") or line == '': - pass - elif active_group_name: - try: - tokens = shlex.split(line) - except ValueError as e: - raise AnsibleError("Error in %s, unable to parse L#%d: %s\n\n\t%s\n" % (self.filename, i, str(e), line)) - - if len(tokens) == 0: - continue - hostname = tokens[0] - port = None - # Three cases to check: - # 0. A hostname that contains a range pesudo-code and a port - # 1. A hostname that contains just a port - if hostname.count(":") > 1: - # Possible an IPv6 address, or maybe a host line with multiple ranges - # IPv6 with Port XXX:XXX::XXX.port - # FQDN foo.example.com - if hostname.count(".") == 1: - (hostname, port) = hostname.rsplit(".", 1) - elif ("[" in hostname and - "]" in hostname and - ":" in hostname and - (hostname.rindex("]") < hostname.rindex(":")) or - ("]" not in hostname and ":" in hostname)): - (hostname, port) = hostname.rsplit(":", 1) - - hostnames = [] - if detect_range(hostname): - hostnames = expand_hostname_range(hostname) - else: - hostnames = [hostname] - - for hn in hostnames: - host = None - if hn in self.hosts: - host = self.hosts[hn] - else: - host = Host(name=hn, port=port) - self.hosts[hn] = host - if len(tokens) > 1: - for t in tokens[1:]: - if t.startswith('#'): - break - try: - (k,v) = t.split("=", 1) - except ValueError, e: - raise AnsibleError("Invalid ini entry in %s: %s - %s" % (self.filename, t, str(e))) - v = self._parse_value(v) - if k == 'ansible_ssh_host': - host.ipv4_address = v - host.set_variable(k, v) - self.groups[active_group_name].add_host(host) - - # [southeast:children] - # atlanta - # raleigh - - def _parse_group_children(self): - group = None - - for line in self.lines: - line = line.strip() - if line is None or line == '': - continue - if line.startswith("[") and ":children]" in line: - line = line.replace("[","").replace(":children]","") - group = self.groups.get(line, None) - if group is None: - group = self.groups[line] = Group(name=line) - elif line.startswith("#") or line.startswith(";"): - pass - elif line.startswith("["): - group = None - elif group: - kid_group = self.groups.get(line, None) - if kid_group is None: - raise AnsibleError("child group is not defined: (%s)" % line) - else: - group.add_child_group(kid_group) - - - # [webservers:vars] - # http_port=1234 - # maxRequestsPerChild=200 - - def _parse_group_variables(self): - group = None - for line in self.lines: - line = line.strip() - if line.startswith("[") and ":vars]" in line: - line = line.replace("[","").replace(":vars]","") - group = self.groups.get(line, None) - if group is None: - raise AnsibleError("can't add vars to undefined group: %s" % line) - elif line.startswith("#") or line.startswith(";"): - pass - elif line.startswith("["): - group = None - elif line == '': - pass - elif group: - if "=" not in line: - raise AnsibleError("variables assigned to group must be in key=value form") - else: - (k, v) = [e.strip() for e in line.split("=", 1)] - group.set_variable(k, self._parse_value(v)) - def get_host_variables(self, host): return {} - def _before_comment(self, msg): - ''' what's the part of a string before a comment? ''' - msg = msg.replace("\#","**NOT_A_COMMENT**") - msg = msg.split("#")[0] - msg = msg.replace("**NOT_A_COMMENT**","#") - return msg + def _compile_patterns(self): + ''' + Compiles the regular expressions required to parse the inventory and + stores them in self.patterns. + ''' + # Section names are square-bracketed expressions at the beginning of a + # line, comprising (1) a group name optionally followed by (2) a tag + # that specifies the contents of the section. We ignore any trailing + # whitespace and/or comments. For example: + # + # [groupname] + # [somegroup:vars] + # [naughty:children] # only get coal in their stockings + + self.patterns['section'] = re.compile( + r'''^\[ + ([^:\]\s]+) # group name (see groupname below) + (?::(\w+))? # optional : and tag name + \] + \s* # ignore trailing whitespace + (?:\#.*)? # and/or a comment till the + $ # end of the line + ''', re.X + ) + + # FIXME: What are the real restrictions on group names, or rather, what + # should they be? At the moment, they must be non-empty sequences of non + # whitespace characters excluding ':' and ']', but we should define more + # precise rules in order to support better diagnostics. The same applies + # to hostnames. + + self.patterns['groupname'] = re.compile( + r'''^ + ([^:\]\s]+) + \s* # ignore trailing whitespace + (?:\#.*)? # and/or a comment till the + $ # end of the line + ''', re.X + ) + + # This matches an IPv6 address, a '.', and a port number. It's not yet + # very strict about matching the IPv6 address. + # + # FIXME: There are various shortcomings in the IPv6 handling in the + # old code, which aren't fixed here yet. For example, Inventory's + # parse_inventory() method seems to accept "[ipv6]:nn" syntax. We + # should pick one and stick with it. + + self.patterns['ipv6_hostport'] = re.compile( + r'''^ + ([a-fA-F0-9:]+) + \.([0-9]+) + $ + ''', re.X + ) From 98a19057962fae31d75046c326795c3f9fe5cd09 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Wed, 19 Aug 2015 20:32:08 +0530 Subject: [PATCH 2/5] Rename 'section' to 'groupname' to better reflect its purpose --- lib/ansible/inventory/ini.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index eddaf93187..127f8863f7 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -80,7 +80,7 @@ class InventoryParser(object): # subgroups, and setting variables as we go. pending_declarations = {} - section = 'ungrouped' + groupname = 'ungrouped' state = 'hosts' i = 0 @@ -99,14 +99,14 @@ class InventoryParser(object): m = self.patterns['section'].match(line) if m: - (section, state) = m.groups() + (groupname, state) = m.groups() state = state or 'hosts' if state not in ['hosts', 'children', 'vars']: title = ":".join(m.groups()) raise AnsibleError("%s:%d: Section [%s] has unknown type: %s" % (self.filename, i, title, state)) - # If we haven't seen this section before, we add a new Group. + # If we haven't seen this group before, we add a new Group. # # Either [groupname] or [groupname:children] is sufficient to # declare a group, but [groupname:vars] is allowed only if the @@ -114,17 +114,17 @@ class InventoryParser(object): # the group anyway, but make a note in pending_declarations to # check at the end. - if section not in self.groups: - self.groups[section] = Group(name=section) + if groupname not in self.groups: + self.groups[groupname] = Group(name=groupname) if state == 'vars': - pending_declarations[section] = dict(line=i, state=state, name=section) + pending_declarations[groupname] = dict(line=i, state=state, name=groupname) # When we see a declaration that we've been waiting for, we can # delete the note. - if section in pending_declarations and state != 'vars': - del pending_declarations[section] + if groupname in pending_declarations and state != 'vars': + del pending_declarations[groupname] continue @@ -137,13 +137,13 @@ class InventoryParser(object): if state == 'hosts': hosts = self._parse_host_definition(line, i) for h in hosts: - self.groups[section].add_host(h) + self.groups[groupname].add_host(h) # [groupname:vars] contains variable definitions that must be # applied to the current group. elif state == 'vars': (k, v) = self._parse_variable_definition(line, i) - self.groups[section].set_variable(k, v) + self.groups[groupname].set_variable(k, v) # [groupname:children] contains subgroup names that must be # added as children of the current group. The subgroup names @@ -154,9 +154,9 @@ class InventoryParser(object): if child not in self.groups: self.groups[child] = Group(name=child) - pending_declarations[child] = dict(line=i, state=state, name=child, parent=section) + pending_declarations[child] = dict(line=i, state=state, name=child, parent=groupname) - self.groups[section].add_child_group(self.groups[child]) + self.groups[groupname].add_child_group(self.groups[child]) # Note: there's no reason why we couldn't accept variable # definitions here, and set them on the named child group. From 9133cd409ccda419c237f9e2bfefbdde85e752f2 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Wed, 19 Aug 2015 20:43:48 +0530 Subject: [PATCH 3/5] Make _parse take an array of input lines as an argument (There's no compelling reason to do this right now, but should be parser need to be called multiple times in future, this makes it easier.) --- lib/ansible/inventory/ini.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index 127f8863f7..a651a1b350 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -54,8 +54,7 @@ class InventoryParser(object): # inventory file. with open(filename) as fh: - self.lines = fh.readlines() - self._parse() + self._parse(fh.readlines()) # Finally, add all top-level groups (including 'ungrouped') as # children of 'all'. @@ -66,10 +65,10 @@ class InventoryParser(object): # Note: we could discard self.hosts after this point. - def _parse(self): + def _parse(self, lines): ''' - Populates self.groups from the contents of self.lines. Raises an error - on any parse failure. + Populates self.groups from the given array of lines. Raises an error on + any parse failure. ''' self._compile_patterns() @@ -84,10 +83,9 @@ class InventoryParser(object): state = 'hosts' i = 0 - for line in self.lines: + for line in lines: i += 1 - # Is there a better way to get rid of the ending \n? line = line.strip() # Skip empty lines and comments From 74aab6f726897707a6a952291cb6a46de80af3bd Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Wed, 19 Aug 2015 20:58:06 +0530 Subject: [PATCH 4/5] Use a self._raise_error helper and avoid passing the lineno around Based on a patch by @Richard2ndQuadrant. --- lib/ansible/inventory/ini.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index a651a1b350..a0d8f0bfa2 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -65,6 +65,9 @@ class InventoryParser(object): # Note: we could discard self.hosts after this point. + def _raise_error(self, message): + raise AnsibleError("%s:%d: " % (self.filename, self.lineno) + message) + def _parse(self, lines): ''' Populates self.groups from the given array of lines. Raises an error on @@ -82,9 +85,9 @@ class InventoryParser(object): groupname = 'ungrouped' state = 'hosts' - i = 0 + self.lineno = 0 for line in lines: - i += 1 + self.lineno += 1 line = line.strip() @@ -102,7 +105,7 @@ class InventoryParser(object): state = state or 'hosts' if state not in ['hosts', 'children', 'vars']: title = ":".join(m.groups()) - raise AnsibleError("%s:%d: Section [%s] has unknown type: %s" % (self.filename, i, title, state)) + self._raise_error("Section [%s] has unknown type: %s" % (title, state)) # If we haven't seen this group before, we add a new Group. # @@ -116,7 +119,7 @@ class InventoryParser(object): self.groups[groupname] = Group(name=groupname) if state == 'vars': - pending_declarations[groupname] = dict(line=i, state=state, name=groupname) + pending_declarations[groupname] = dict(line=self.lineno, state=state, name=groupname) # When we see a declaration that we've been waiting for, we can # delete the note. @@ -133,14 +136,14 @@ class InventoryParser(object): # [groupname] contains host definitions that must be added to # the current group. if state == 'hosts': - hosts = self._parse_host_definition(line, i) + hosts = self._parse_host_definition(line) for h in hosts: self.groups[groupname].add_host(h) # [groupname:vars] contains variable definitions that must be # applied to the current group. elif state == 'vars': - (k, v) = self._parse_variable_definition(line, i) + (k, v) = self._parse_variable_definition(line) self.groups[groupname].set_variable(k, v) # [groupname:children] contains subgroup names that must be @@ -148,11 +151,11 @@ class InventoryParser(object): # must themselves be declared as groups, but as before, they # may only be declared later. elif state == 'children': - child = self._parse_group_name(line, i) + child = self._parse_group_name(line) if child not in self.groups: self.groups[child] = Group(name=child) - pending_declarations[child] = dict(line=i, state=state, name=child, parent=groupname) + pending_declarations[child] = dict(line=self.lineno, state=state, name=child, parent=groupname) self.groups[groupname].add_child_group(self.groups[child]) @@ -162,7 +165,7 @@ class InventoryParser(object): # This is a fencepost. It can happen only if the state checker # accepts a state that isn't handled above. else: - raise AnsibleError("%s:%d: Entered unhandled state: %s" % (self.filename, i, state)) + self._raise_error("Entered unhandled state: %s" % (state)) # Any entries in pending_declarations not removed by a group declaration # above mean that there was an unresolved forward reference. We report @@ -175,7 +178,7 @@ class InventoryParser(object): elif decl['state'] == 'children': raise AnsibleError("%s:%d: Section [%s:children] includes undefined group: %s" % (self.filename, decl['line'], decl['parent'], decl['name'])) - def _parse_group_name(self, line, i): + def _parse_group_name(self, line): ''' Takes a single line and tries to parse it as a group name. Returns the group name if successful, or raises an error. @@ -185,9 +188,9 @@ class InventoryParser(object): if m: return m.group(1) - raise AnsibleError("%s:%d: Expected group name, got: %s" % (self.filename, i, line)) + self._raise_error("Expected group name, got: %s" % (line)) - def _parse_variable_definition(self, line, i): + def _parse_variable_definition(self, line): ''' Takes a string and tries to parse it as a variable definition. Returns the key and value if successful, or raises an error. @@ -202,9 +205,9 @@ class InventoryParser(object): (k, v) = [e.strip() for e in line.split("=", 1)] return (k, self._parse_value(v)) - raise AnsibleError("%s:%d: Expected key=value, got: %s" % (self.filename, i, line)) + self._raise_error("Expected key=value, got: %s" % (line)) - def _parse_host_definition(self, line, i): + def _parse_host_definition(self, line): ''' Takes a single line and tries to parse it as a host definition. Returns a list of Hosts if successful, or raises an error. @@ -223,7 +226,7 @@ class InventoryParser(object): try: tokens = shlex.split(line, comments=True) except ValueError as e: - raise AnsibleError("%s:%d: Error parsing host definition '%s': %s" % (self.filename, i, varstring, e)) + self._raise_error("Error parsing host definition '%s': %s" % (varstring, e)) (hostnames, port) = self._expand_hostpattern(tokens[0]) hosts = self._Hosts(hostnames, port) @@ -233,7 +236,7 @@ class InventoryParser(object): variables = {} for t in tokens[1:]: if '=' not in t: - raise AnsibleError("%s:%d: Expected key=value host variable assignment, got: %s" % (self.filename, i, t)) + self._raise_error("Expected key=value host variable assignment, got: %s" % (t)) (k, v) = t.split('=', 1) variables[k] = self._parse_value(v) From 745ecd4845fc2eb75f175f25385afb891849b1bb Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Thu, 20 Aug 2015 22:03:22 +0530 Subject: [PATCH 5/5] Sanitize IPv6 hostname/port handling Now we accept IPv6 addresses _with port numbers_ only in the standard [xxx]:NN notation (though bare IPv6 addresses may be given, as before, and non-IPv6 addresses may also be placed in square brackets), and any other host identifiers (IPv4/hostname/host pattern) as before, with an optional :NN suffix. --- lib/ansible/inventory/ini.py | 73 +++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/lib/ansible/inventory/ini.py b/lib/ansible/inventory/ini.py index a0d8f0bfa2..2769632ef2 100644 --- a/lib/ansible/inventory/ini.py +++ b/lib/ansible/inventory/ini.py @@ -250,43 +250,36 @@ class InventoryParser(object): return hosts - def _expand_hostpattern(self, pattern): + def _expand_hostpattern(self, hostpattern): ''' Takes a single host pattern and returns a list of hostnames and an optional port number that applies to all of them. ''' - # First, we extract the port number. This is usually ":NN" at the end of - # the expression, but for IPv6 addresses it's ".NN" instead. In either - # case, we remove it. + # Is a port number specified? + # + # This may be a mandatory :NN suffix on any square-bracketed expression + # (IPv6 address, IPv4 address, host name, host pattern), or an optional + # :NN suffix on an IPv4 address, host name, or pattern. IPv6 addresses + # must be in square brackets if a port is specified. port = None - if ':' in pattern: - pos = pattern.rindex(':') - try: - port = int(pattern[pos+1:]) - pattern = pattern[0:pos] - except ValueError: - pass - else: - m = self.patterns['ipv6_hostport'].match(pattern) + + for type in ['bracketed_hostport', 'hostport']: + m = self.patterns[type].match(hostpattern) if m: - (pattern, port) = m.groups() - - # We're done, because we know this is a single IPv6 address. - # But should we support ranges for IPv6 address generation? - # See the FIXME note below. We should probably just accept - # "[xxx]:nn" syntax instead, and then let xxx be expanded. - - return ([pattern], int(port)) + (hostpattern, port) = m.groups() + continue # Now we're left with just the pattern, which results in a list of one # or more hostnames, depending on whether it contains any [x:y] ranges. + # + # FIXME: We could be more strict here about validation. - if detect_range(pattern): - hostnames = expand_hostname_range(pattern) + if detect_range(hostpattern): + hostnames = expand_hostname_range(hostpattern) else: - hostnames = [pattern] + hostnames = [hostpattern] return (hostnames, port) @@ -362,7 +355,7 @@ class InventoryParser(object): # should they be? At the moment, they must be non-empty sequences of non # whitespace characters excluding ':' and ']', but we should define more # precise rules in order to support better diagnostics. The same applies - # to hostnames. + # to hostnames. It seems sensible for them both to follow DNS rules. self.patterns['groupname'] = re.compile( r'''^ @@ -373,18 +366,28 @@ class InventoryParser(object): ''', re.X ) - # This matches an IPv6 address, a '.', and a port number. It's not yet - # very strict about matching the IPv6 address. - # - # FIXME: There are various shortcomings in the IPv6 handling in the - # old code, which aren't fixed here yet. For example, Inventory's - # parse_inventory() method seems to accept "[ipv6]:nn" syntax. We - # should pick one and stick with it. + # The following patterns match the various ways in which a port number + # may be specified on an IPv6 address, IPv4 address, hostname, or host + # pattern. All of the above may be enclosed in square brackets with a + # mandatory :NN suffix; or all but the first may be given without any + # brackets but with an :NN suffix. - self.patterns['ipv6_hostport'] = re.compile( + self.patterns['bracketed_hostport'] = re.compile( r'''^ - ([a-fA-F0-9:]+) - \.([0-9]+) + \[(.+)\] # [host identifier] + :([0-9]+) # :port number + $ + ''', re.X + ) + + self.patterns['hostport'] = re.compile( + r'''^ + ((?: # We want to match: + [^:\[\]] # (a non-range character + | # ...or... + \[[^\]]*\] # a complete bracketed expression) + )*) # repeated as many times as possible + :([0-9]+) # followed by a port number $ ''', re.X )