Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/patsy/tokens.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of Patsy
2# Copyright (C) 2011 Nathaniel Smith <njs@pobox.com>
3# See file LICENSE.txt for license information.
5# Utilities for dealing with Python code at the token level.
6#
7# Includes:
8# a "pretty printer" that converts a sequence of tokens back into a
9# readable, white-space normalized string.
10# a utility function to replace calls to global functions with calls to
11# other functions
13import tokenize
14from six.moves import cStringIO as StringIO
16from patsy import PatsyError
17from patsy.origin import Origin
19__all__ = ["python_tokenize", "pretty_untokenize",
20 "normalize_token_spacing"]
22# A convenience wrapper around tokenize.generate_tokens. yields tuples
23# (tokenize type, token string, origin object)
24def python_tokenize(code):
25 # Since formulas can only contain Python expressions, and Python
26 # expressions cannot meaningfully contain newlines, we'll just remove all
27 # the newlines up front to avoid any complications:
28 code = code.replace("\n", " ").strip()
29 it = tokenize.generate_tokens(StringIO(code).readline)
30 try:
31 for (pytype, string, (_, start), (_, end), code) in it:
32 if pytype == tokenize.ENDMARKER:
33 break
34 origin = Origin(code, start, end)
35 assert pytype != tokenize.NL
36 if pytype == tokenize.NEWLINE:
37 assert string == ""
38 continue
39 if pytype == tokenize.ERRORTOKEN:
40 raise PatsyError("error tokenizing input "
41 "(maybe an unclosed string?)",
42 origin)
43 if pytype == tokenize.COMMENT:
44 raise PatsyError("comments are not allowed", origin)
45 yield (pytype, string, origin)
46 else: # pragma: no cover
47 raise ValueError("stream ended without ENDMARKER?!?")
48 except tokenize.TokenError as e:
49 # TokenError is raised iff the tokenizer thinks that there is
50 # some sort of multi-line construct in progress (e.g., an
51 # unclosed parentheses, which in Python lets a virtual line
52 # continue past the end of the physical line), and it hits the
53 # end of the source text. We have our own error handling for
54 # such cases, so just treat this as an end-of-stream.
55 #
56 # Just in case someone adds some other error case:
57 assert e.args[0].startswith("EOF in multi-line")
58 return
60def test_python_tokenize():
61 code = "a + (foo * -1)"
62 tokens = list(python_tokenize(code))
63 expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
64 (tokenize.OP, "+", Origin(code, 2, 3)),
65 (tokenize.OP, "(", Origin(code, 4, 5)),
66 (tokenize.NAME, "foo", Origin(code, 5, 8)),
67 (tokenize.OP, "*", Origin(code, 9, 10)),
68 (tokenize.OP, "-", Origin(code, 11, 12)),
69 (tokenize.NUMBER, "1", Origin(code, 12, 13)),
70 (tokenize.OP, ")", Origin(code, 13, 14))]
71 assert tokens == expected
73 code2 = "a + (b"
74 tokens2 = list(python_tokenize(code2))
75 expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
76 (tokenize.OP, "+", Origin(code2, 2, 3)),
77 (tokenize.OP, "(", Origin(code2, 4, 5)),
78 (tokenize.NAME, "b", Origin(code2, 5, 6))]
79 assert tokens2 == expected2
81 from nose.tools import assert_raises
82 assert_raises(PatsyError, list, python_tokenize("a b # c"))
84 from nose.tools import assert_raises
85 assert_raises(PatsyError, list, python_tokenize("a b \"c"))
87_python_space_both = (list("+-*/%&^|<>")
88 + ["==", "<>", "!=", "<=", ">=",
89 "<<", ">>", "**", "//"])
90_python_space_before = (_python_space_both
91 + ["!", "~"])
92_python_space_after = (_python_space_both
93 + [",", ":"])
95def pretty_untokenize(typed_tokens):
96 text = []
97 prev_was_space_delim = False
98 prev_wants_space = False
99 prev_was_open_paren_or_comma = False
100 prev_was_object_like = False
101 brackets = []
102 for token_type, token in typed_tokens:
103 assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
104 tokenize.NL)
105 if token_type == tokenize.NEWLINE:
106 continue
107 if token_type == tokenize.ENDMARKER:
108 continue
109 if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
110 if prev_wants_space or prev_was_space_delim:
111 text.append(" ")
112 text.append(token)
113 prev_wants_space = False
114 prev_was_space_delim = True
115 else:
116 if token in ("(", "[", "{"):
117 brackets.append(token)
118 elif brackets and token in (")", "]", "}"):
119 brackets.pop()
120 this_wants_space_before = (token in _python_space_before)
121 this_wants_space_after = (token in _python_space_after)
122 # Special case for slice syntax: foo[:10]
123 # Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
124 if token == ":" and brackets and brackets[-1] == "[":
125 this_wants_space_after = False
126 # Special case for foo(*args), foo(a, *args):
127 if token in ("*", "**") and prev_was_open_paren_or_comma:
128 this_wants_space_before = False
129 this_wants_space_after = False
130 # Special case for "a = foo(b=1)":
131 if token == "=" and not brackets:
132 this_wants_space_before = True
133 this_wants_space_after = True
134 # Special case for unary -, +. Our heuristic is that if we see the
135 # + or - after something that looks like an object (a NAME,
136 # NUMBER, STRING, or close paren) then it is probably binary,
137 # otherwise it is probably unary.
138 if token in ("+", "-") and not prev_was_object_like:
139 this_wants_space_before = False
140 this_wants_space_after = False
141 if prev_wants_space or this_wants_space_before:
142 text.append(" ")
143 text.append(token)
144 prev_wants_space = this_wants_space_after
145 prev_was_space_delim = False
146 if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
147 or token == ")"):
148 prev_was_object_like = True
149 else:
150 prev_was_object_like = False
151 prev_was_open_paren_or_comma = token in ("(", ",")
152 return "".join(text)
154def normalize_token_spacing(code):
155 tokens = [(t[0], t[1])
156 for t in tokenize.generate_tokens(StringIO(code).readline)]
157 return pretty_untokenize(tokens)
159def test_pretty_untokenize_and_normalize_token_spacing():
160 assert normalize_token_spacing("1 + 1") == "1 + 1"
161 assert normalize_token_spacing("1+1") == "1 + 1"
162 assert normalize_token_spacing("1*(2+3**2)") == "1 * (2 + 3 ** 2)"
163 assert normalize_token_spacing("a and b") == "a and b"
164 assert normalize_token_spacing("foo(a=bar.baz[1:])") == "foo(a=bar.baz[1:])"
165 assert normalize_token_spacing("""{"hi":foo[:]}""") == """{"hi": foo[:]}"""
166 assert normalize_token_spacing("""'a' "b" 'c'""") == """'a' "b" 'c'"""
167 assert normalize_token_spacing('"""a""" is 1 or 2==3') == '"""a""" is 1 or 2 == 3'
168 assert normalize_token_spacing("foo ( * args )") == "foo(*args)"
169 assert normalize_token_spacing("foo ( a * args )") == "foo(a * args)"
170 assert normalize_token_spacing("foo ( ** args )") == "foo(**args)"
171 assert normalize_token_spacing("foo ( a ** args )") == "foo(a ** args)"
172 assert normalize_token_spacing("foo (1, * args )") == "foo(1, *args)"
173 assert normalize_token_spacing("foo (1, a * args )") == "foo(1, a * args)"
174 assert normalize_token_spacing("foo (1, ** args )") == "foo(1, **args)"
175 assert normalize_token_spacing("foo (1, a ** args )") == "foo(1, a ** args)"
177 assert normalize_token_spacing("a=foo(b = 1)") == "a = foo(b=1)"
179 assert normalize_token_spacing("foo(+ 10, bar = - 1)") == "foo(+10, bar=-1)"
180 assert normalize_token_spacing("1 + +10 + -1 - 5") == "1 + +10 + -1 - 5"