Refactoring split_args into sub-functions

2024-09-14 20:13:21 +02:00 · 2014-07-24 20:00:57 -05:00 · 2014-07-24 20:00:57 -05:00 · b8a4ba26f0
commit b8a4ba26f0
parent f0859a6d80
1 changed files with 73 additions and 86 deletions
--- a/lib/ansible/utils/splitter.py
+++ b/lib/ansible/utils/splitter.py
@ -15,6 +15,39 @@
 # You should have received a copy of the GNU General Public License
 # along with Ansible.  If not, see <http://www.gnu.org/licenses/>.

+def _get_quote_state(token, quote_char):
+    '''
+    the goal of this block is to determine if the quoted string
+    is unterminated in which case it needs to be put back together
+    '''
+    # the char before the current one, used to see if
+    # the current character is escaped
+    prev_char = None
+    for idx, cur_char in enumerate(token):
+        if idx > 0:
+            prev_char = token[idx-1]
+        if cur_char in '"\'':
+            if quote_char:
+                if cur_char == quote_char and prev_char != '\\':
+                    quote_char = None
+            else:
+                quote_char = cur_char
+    return quote_char
+
+def _count_jinja2_blocks(token, cur_depth, open_token, close_token):
+    '''
+    this function counts the number of opening/closing blocks for a
+    given opening/closing type and adjusts the current depth for that
+    block based on the difference
+    '''
+    num_open  = token.count(open_token)
+    num_close = token.count(close_token)
+    if num_open != num_close:
+        cur_depth += (num_open - num_close)
+        if cur_depth < 0:
+            cur_depth = 0
+    return cur_depth
+
 def split_args(args):
    '''
    Splits args on whitespace, but intelligently reassembles
@ -24,15 +57,13 @@ def split_args(args):
    jinja2 blocks, however this function is/will be used in the
    core portions as well before the args are templated.

-    example input: a=b c=d
-    example output: dict(a='b', c='d')
+    example input: a=b c="foo bar"
+    example output: ['a=b', 'c="foo bar"']

    Basically this is a variation shlex that has some more intelligence for
    how Ansible needs to use it.
    '''

-    # FIXME: refactoring into smaller functions
-
    # the list of params parsed out of the arg string
    # this is going to be the result value when we are donei
    params = []
@ -40,52 +71,32 @@ def split_args(args):
    # here we encode the args, so we have a uniform charset to
    # work with, and split on white space
    args = args.encode('utf-8')
-    items = args.split()
+    tokens = args.split()

-    # iterate over the items, and reassemble any that may have been
-    # split on a space inside a jinja2 block. 
+    # iterate over the tokens, and reassemble any that may have been
+    # split on a space inside a jinja2 block.
    # ex if tokens are "{{", "foo", "}}" these go together

    # These variables are used
    # to keep track of the state of the parsing, since blocks and quotes
    # may be nested within each other.

-    inside_quotes = False
    quote_char = None
-    split_print_depth = 0
-    split_block_depth = 0
-    split_comment_depth = 0
+    inside_quotes = False
+    print_depth   = 0 # used to count nested jinja2 {{ }} blocks
+    block_depth   = 0 # used to count nested jinja2 {% %} blocks
+    comment_depth = 0 # used to count nested jinja2 {# #} blocks

-    # now we loop over each split item, coalescing items if the white space
+    # now we loop over each split token, coalescing tokens if the white space
    # split occurred within quotes or a jinja2 block of some kind
+    for token in tokens:

-    for item in items:
-
-        item = item.strip()
+        token = token.strip()

        # store the previous quoting state for checking later
        was_inside_quotes = inside_quotes
-
-        # determine the current quoting state
-        # the goal of this block is to determine if the quoted string
-        # is unterminated in which case it needs to be put back together
-
-        bc = None # before_char
-        for i in range(0, len(item)):  # use enumerate
-
-            c = item[i]  # current_char
-
-            if i > 0:
-                bc = item[i-1]
-
-            if c in ('"', "'"):
-                if inside_quotes:
-                    if c == quote_char and bc != '\\':
-                        inside_quotes = False
-                        quote_char = None
-                else:
-                    inside_quotes = True
-                    quote_char = c
+        quote_char = _get_quote_state(token, quote_char)
+        inside_quotes = quote_char is not None

        # multiple conditions may append a token to the list of params,
        # so we keep track with this flag to make sure it only happens once
@ -93,69 +104,45 @@ def split_args(args):
        # it to the end of the last token
        appended = False

-        # if we're inside quotes now, but weren't before, append the item
+        # if we're inside quotes now, but weren't before, append the token 
        # to the end of the list, since we'll tack on more to it later
-
-        if inside_quotes and not was_inside_quotes:
-            params.append(item)
-            appended = True
-
        # otherwise, if we're inside any jinja2 block, inside quotes, or we were
-        # inside quotes (but aren't now) concat this item to the last param
-        # FIXME: just or these all together
-        elif (split_print_depth or split_block_depth or split_comment_depth or inside_quotes or was_inside_quotes):
-            params[-1] = "%s %s" % (params[-1], item)
+        # inside quotes (but aren't now) concat this token to the last param
+        if inside_quotes and not was_inside_quotes:
+            params.append(token)
+            appended = True
+        elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
+            params[-1] = "%s %s" % (params[-1], token)
            appended = True
-
-        # these variables are used to determine the current depth of each jinja2
-        # block type, by counting the number of openings and closing tags
-        # FIXME: assumes Jinja2 seperators aren't changeable (also true elsewhere in ansible ATM)
-
-        num_print_open    = item.count('{{')
-        num_print_close   = item.count('}}')
-        num_block_open    = item.count('{%')
-        num_block_close   = item.count('%}')
-        num_comment_open  = item.count('{#')
-        num_comment_close = item.count('#}')

        # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
-        # and may append the current item to the params (if we haven't previously done so)
+        # and may append the current token to the params (if we haven't previously done so)
+        prev_print_depth = print_depth
+        print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}")
+        if print_depth != prev_print_depth and not appended:
+            params.append(token)
+            appended = True

-        # FIXME: DRY a bit
-        if num_print_open != num_print_close:
-            split_print_depth += (num_print_open - num_print_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_print_depth < 0:
-                split_print_depth = 0
+        prev_block_depth = block_depth
+        block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}")
+        if block_depth != prev_block_depth and not appended:
+            params.append(token)
+            appended = True

-        if num_block_open != num_block_close:
-            split_block_depth += (num_block_open - num_block_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_block_depth < 0:
-                split_block_depth = 0
-
-        if num_comment_open != num_comment_close:
-            split_comment_depth += (num_comment_open - num_comment_close)
-            if not appended:
-                params.append(item)
-                appended = True
-            if split_comment_depth < 0:
-                split_comment_depth = 0
+        prev_comment_depth = comment_depth
+        comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}")
+        if comment_depth != prev_comment_depth and not appended:
+            params.append(token)
+            appended = True

        # finally, if we're at zero depth for all blocks and not inside quotes, and have not
        # yet appended anything to the list of params, we do so now
-
-        if not (split_print_depth or split_block_depth or split_comment_depth) and not inside_quotes and not appended:
-            params.append(item)
+        if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended:
+            params.append(token)

    # If we're done and things are not at zero depth or we're still inside quotes,
    # raise an error to indicate that the args were unbalanced
-
-    if (split_print_depth or split_block_depth or split_comment_depth) or inside_quotes:
+    if print_depth or block_depth or comment_depth or inside_quotes:
        raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")

    # finally, we decode each param back to the unicode it was in the arg string