From 21642c0fbfecaa3db4cda1d6270bc8beeba8671c Mon Sep 17 00:00:00 2001
From: Michael DeHaan <michael@ansible.com>
Date: Tue, 7 Oct 2014 14:56:15 -0400
Subject: [PATCH] We need module args specific parsing classes.

---
 test/v2/playbook/test_task.py  |   1 -
 v2/ansible/parsing/__init__.py |   1 +
 v2/ansible/parsing/modargs.py  |  26 ++++
 v2/ansible/parsing/splitter.py | 215 +++++++++++++++++++++++++++++++++
 v2/ansible/playbook/task.py    |  24 +++-
 5 files changed, 260 insertions(+), 7 deletions(-)
 create mode 100644 v2/ansible/parsing/__init__.py
 create mode 100644 v2/ansible/parsing/modargs.py
 create mode 100644 v2/ansible/parsing/splitter.py

diff --git a/test/v2/playbook/test_task.py b/test/v2/playbook/test_task.py
index 124fc1bc98..8120ba36c5 100644
--- a/test/v2/playbook/test_task.py
+++ b/test/v2/playbook/test_task.py
@@ -47,7 +47,6 @@ class TestTask(unittest.TestCase):
     def test_can_auto_name(self):
         assert 'name' not in kv_shell_task
         t = Task.load(kv_shell_task)
-        print "GOT NAME=(%s)" % t.name
         assert t.name == 'shell echo hi'
 
     def test_can_auto_name_with_role(self):
diff --git a/v2/ansible/parsing/__init__.py b/v2/ansible/parsing/__init__.py
new file mode 100644
index 0000000000..44026bdff0
--- /dev/null
+++ b/v2/ansible/parsing/__init__.py
@@ -0,0 +1 @@
+# TODO: header
diff --git a/v2/ansible/parsing/modargs.py b/v2/ansible/parsing/modargs.py
new file mode 100644
index 0000000000..9af202c635
--- /dev/null
+++ b/v2/ansible/parsing/modargs.py
@@ -0,0 +1,26 @@
+# (c) 2014 Michael DeHaan, <michael@ansible.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+
+class ModArgsParser(object)
+
+    def __init__(self, thing1, thing2):
+        pass
+
+    def parse():
+        raise exception.NotImplementedError
+
+
diff --git a/v2/ansible/parsing/splitter.py b/v2/ansible/parsing/splitter.py
new file mode 100644
index 0000000000..430f4e299a
--- /dev/null
+++ b/v2/ansible/parsing/splitter.py
@@ -0,0 +1,215 @@
+# (c) 2014 James Cammarata, <jcammarata@ansible.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+
+def parse_kv(args):
+    ''' convert a string of key/value items to a dict '''
+    options = {}
+    if args is not None:
+        try:
+            vargs = split_args(args)
+        except ValueError, ve:
+            if 'no closing quotation' in str(ve).lower():
+                raise errors.AnsibleError("error parsing argument string, try quoting the entire line.")
+            else:
+                raise
+        for x in vargs:
+            if "=" in x:
+                k, v = x.split("=",1)
+                options[k.strip()] = unquote(v.strip())
+    return options
+
+def _get_quote_state(token, quote_char):
+    '''
+    the goal of this block is to determine if the quoted string
+    is unterminated in which case it needs to be put back together
+    '''
+    # the char before the current one, used to see if
+    # the current character is escaped
+    prev_char = None
+    for idx, cur_char in enumerate(token):
+        if idx > 0:
+            prev_char = token[idx-1]
+        if cur_char in '"\'' and prev_char != '\\':
+            if quote_char:
+                if cur_char == quote_char:
+                    quote_char = None
+            else:
+                quote_char = cur_char
+    return quote_char
+
+def _count_jinja2_blocks(token, cur_depth, open_token, close_token):
+    '''
+    this function counts the number of opening/closing blocks for a
+    given opening/closing type and adjusts the current depth for that
+    block based on the difference
+    '''
+    num_open  = token.count(open_token)
+    num_close = token.count(close_token)
+    if num_open != num_close:
+        cur_depth += (num_open - num_close)
+        if cur_depth < 0:
+            cur_depth = 0
+    return cur_depth
+
+def split_args(args):
+    '''
+    Splits args on whitespace, but intelligently reassembles
+    those that may have been split over a jinja2 block or quotes.
+
+    When used in a remote module, we won't ever have to be concerned about
+    jinja2 blocks, however this function is/will be used in the
+    core portions as well before the args are templated.
+
+    example input: a=b c="foo bar"
+    example output: ['a=b', 'c="foo bar"']
+
+    Basically this is a variation shlex that has some more intelligence for
+    how Ansible needs to use it.
+    '''
+
+    # the list of params parsed out of the arg string
+    # this is going to be the result value when we are donei
+    params = []
+
+    # here we encode the args, so we have a uniform charset to
+    # work with, and split on white space
+    args = args.strip()
+    try:
+        args = args.encode('utf-8')
+        do_decode = True
+    except UnicodeDecodeError:
+        do_decode = False
+    items = args.strip().split('\n')
+
+    # iterate over the tokens, and reassemble any that may have been
+    # split on a space inside a jinja2 block.
+    # ex if tokens are "{{", "foo", "}}" these go together
+
+    # These variables are used
+    # to keep track of the state of the parsing, since blocks and quotes
+    # may be nested within each other.
+
+    quote_char = None
+    inside_quotes = False
+    print_depth   = 0 # used to count nested jinja2 {{ }} blocks
+    block_depth   = 0 # used to count nested jinja2 {% %} blocks
+    comment_depth = 0 # used to count nested jinja2 {# #} blocks
+
+    # now we loop over each split chunk, coalescing tokens if the white space
+    # split occurred within quotes or a jinja2 block of some kind
+    for itemidx,item in enumerate(items):
+
+        # we split on spaces and newlines separately, so that we
+        # can tell which character we split on for reassembly
+        # inside quotation characters
+        tokens = item.strip().split(' ')
+
+        line_continuation = False
+        for idx,token in enumerate(tokens):
+
+            # if we hit a line continuation character, but
+            # we're not inside quotes, ignore it and continue
+            # on to the next token while setting a flag
+            if token == '\\' and not inside_quotes:
+                line_continuation = True
+                continue
+
+            # store the previous quoting state for checking later
+            was_inside_quotes = inside_quotes
+            quote_char = _get_quote_state(token, quote_char)
+            inside_quotes = quote_char is not None
+
+            # multiple conditions may append a token to the list of params,
+            # so we keep track with this flag to make sure it only happens once
+            # append means add to the end of the list, don't append means concatenate
+            # it to the end of the last token
+            appended = False
+
+            # if we're inside quotes now, but weren't before, append the token
+            # to the end of the list, since we'll tack on more to it later
+            # otherwise, if we're inside any jinja2 block, inside quotes, or we were
+            # inside quotes (but aren't now) concat this token to the last param
+            if inside_quotes and not was_inside_quotes:
+                params.append(token)
+                appended = True
+            elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
+                if idx == 0 and not inside_quotes and was_inside_quotes:
+                    params[-1] = "%s%s" % (params[-1], token)
+                elif len(tokens) > 1:
+                    spacer = ''
+                    if idx > 0:
+                        spacer = ' '
+                    params[-1] = "%s%s%s" % (params[-1], spacer, token)
+                else:
+                    params[-1] = "%s\n%s" % (params[-1], token)
+                appended = True
+
+            # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
+            # and may append the current token to the params (if we haven't previously done so)
+            prev_print_depth = print_depth
+            print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}")
+            if print_depth != prev_print_depth and not appended:
+                params.append(token)
+                appended = True
+
+            prev_block_depth = block_depth
+            block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}")
+            if block_depth != prev_block_depth and not appended:
+                params.append(token)
+                appended = True
+
+            prev_comment_depth = comment_depth
+            comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}")
+            if comment_depth != prev_comment_depth and not appended:
+                params.append(token)
+                appended = True
+
+            # finally, if we're at zero depth for all blocks and not inside quotes, and have not
+            # yet appended anything to the list of params, we do so now
+            if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended and token != '':
+                params.append(token)
+
+        # if this was the last token in the list, and we have more than
+        # one item (meaning we split on newlines), add a newline back here
+        # to preserve the original structure
+        if len(items) > 1 and itemidx != len(items) - 1 and not line_continuation:
+            if not params[-1].endswith('\n'):
+                params[-1] += '\n'
+
+        # always clear the line continuation flag
+        line_continuation = False
+
+    # If we're done and things are not at zero depth or we're still inside quotes,
+    # raise an error to indicate that the args were unbalanced
+    if print_depth or block_depth or comment_depth or inside_quotes:
+        raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
+
+    # finally, we decode each param back to the unicode it was in the arg string
+    if do_decode:
+        params = [x.decode('utf-8') for x in params]
+
+    return params
+
+def is_quoted(data):
+    return len(data) > 0 and (data[0] == '"' and data[-1] == '"' or data[0] == "'" and data[-1] == "'")
+
+def unquote(data):
+    ''' removes first and last quotes from a string, if the string starts and ends with the same quotes '''
+    if is_quoted(data):
+        return data[1:-1]
+    return data
+
diff --git a/v2/ansible/playbook/task.py b/v2/ansible/playbook/task.py
index dd37fe1da2..856246c327 100644
--- a/v2/ansible/playbook/task.py
+++ b/v2/ansible/playbook/task.py
@@ -19,7 +19,7 @@ from ansible.playbook.base import Base
 from ansible.playbook.attribute import Attribute, FieldAttribute
 
 # from ansible.playbook.conditional import Conditional
-# from ansible.common.errors import AnsibleError
+from ansible.errors import AnsibleError
 
 # TODO: it would be fantastic (if possible) if a task new where in the YAML it was defined for describing
 # it in error conditions
@@ -125,9 +125,21 @@ class Task(Base):
         ''' returns a human readable representation of the task '''
         return "TASK: %s" % self.get_name()
                 
+    def _parse_old_school_action(self, v):
+        ''' given a action/local_action line, return the module and args ''' 
+        tokens = v.split()
+        if len(tokens) < 2:
+            return [v,{}]
+        else:
+            if v not in [ 'command', 'shell' ]:
+                joined = " ".join(tokens[1:])
+                return [tokens[0], parse_kv(joined)]
+            else:
+                return [tokens[0], joined] 
 
     def _munge_action(self, ds, new_ds, k, v):
         ''' take a module name and split into action and args '''
+
         if self._action.value is not None or 'action' in ds or 'local_action' in ds:
             raise AnsibleError("duplicate action in task: %s" % k)
         new_ds['action'] = k
@@ -136,6 +148,7 @@ class Task(Base):
 
     def _munge_loop(self, ds, new_ds, k, v):
         ''' take a lookup plugin name and store it correctly '''
+
         if self._loop.value is not None:
             raise AnsibleError("duplicate loop in task: %s" % k)
         new_ds['loop'] = k
@@ -143,9 +156,10 @@ class Task(Base):
                 
     def _munge_action2(self, ds, new_ds, k, v, local=False):
         ''' take an old school action/local_action and reformat it '''
+
         if isinstance(v, basestring):
-            (module, args) = parse_kv(v)
-            new_ds['action'] = module
+            tokens = self._parse_old_school_action(v)
+            new_ds['action'] = tokens[0]
             if 'args' in ds:
                 raise AnsibleError("unexpected and redundant 'args'")
                 new_ds['args'] = args
@@ -153,8 +167,6 @@ class Task(Base):
                     if 'delegate_to' in ds:
                        raise AnsbileError("local_action and action conflict")
                     new_ds['delegate_to'] = 'localhost'
-            else:
-                raise AnsibleError("unexpected use of 'action'")
         else:
             raise AnsibleError("unexpected use of 'action'")
 
@@ -171,7 +183,7 @@ class Task(Base):
             if k in module_finder:
                 self._munge_action(ds, new_ds, k, v)
             elif "with_%s" % k in lookup_finder:
-                self._munge_loop(new_ds, k, v)
+                self._munge_loop(ds, new_ds, k, v)
             elif k == 'action':
                 self._munge_action2(ds, new_ds, k, v) 
             elif k == 'local_action':