1 | doctests = """
|
---|
2 | Tests for the tokenize module.
|
---|
3 |
|
---|
4 | >>> import glob, random, sys
|
---|
5 |
|
---|
6 | The tests can be really simple. Given a small fragment of source
|
---|
7 | code, print out a table with tokens. The ENDMARK is omitted for
|
---|
8 | brevity.
|
---|
9 |
|
---|
10 | >>> dump_tokens("1 + 1")
|
---|
11 | NUMBER '1' (1, 0) (1, 1)
|
---|
12 | OP '+' (1, 2) (1, 3)
|
---|
13 | NUMBER '1' (1, 4) (1, 5)
|
---|
14 |
|
---|
15 | >>> dump_tokens("if False:\\n"
|
---|
16 | ... " # NL\\n"
|
---|
17 | ... " True = False # NEWLINE\\n")
|
---|
18 | NAME 'if' (1, 0) (1, 2)
|
---|
19 | NAME 'False' (1, 3) (1, 8)
|
---|
20 | OP ':' (1, 8) (1, 9)
|
---|
21 | NEWLINE '\\n' (1, 9) (1, 10)
|
---|
22 | COMMENT '# NL' (2, 4) (2, 8)
|
---|
23 | NL '\\n' (2, 8) (2, 9)
|
---|
24 | INDENT ' ' (3, 0) (3, 4)
|
---|
25 | NAME 'True' (3, 4) (3, 8)
|
---|
26 | OP '=' (3, 9) (3, 10)
|
---|
27 | NAME 'False' (3, 11) (3, 16)
|
---|
28 | COMMENT '# NEWLINE' (3, 17) (3, 26)
|
---|
29 | NEWLINE '\\n' (3, 26) (3, 27)
|
---|
30 | DEDENT '' (4, 0) (4, 0)
|
---|
31 |
|
---|
32 | >>> indent_error_file = \"""
|
---|
33 | ... def k(x):
|
---|
34 | ... x += 2
|
---|
35 | ... x += 5
|
---|
36 | ... \"""
|
---|
37 |
|
---|
38 | >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
|
---|
39 | Traceback (most recent call last):
|
---|
40 | ...
|
---|
41 | IndentationError: unindent does not match any outer indentation level
|
---|
42 |
|
---|
43 | Test roundtrip for `untokenize`. `f` is an open file or a string. The source
|
---|
44 | code in f is tokenized, converted back to source code via tokenize.untokenize(),
|
---|
45 | and tokenized again from the latter. The test fails if the second tokenization
|
---|
46 | doesn't match the first.
|
---|
47 |
|
---|
48 | >>> def roundtrip(f):
|
---|
49 | ... if isinstance(f, str): f = StringIO(f)
|
---|
50 | ... token_list = list(generate_tokens(f.readline))
|
---|
51 | ... f.close()
|
---|
52 | ... tokens1 = [tok[:2] for tok in token_list]
|
---|
53 | ... new_text = untokenize(tokens1)
|
---|
54 | ... readline = iter(new_text.splitlines(1)).next
|
---|
55 | ... tokens2 = [tok[:2] for tok in generate_tokens(readline)]
|
---|
56 | ... return tokens1 == tokens2
|
---|
57 | ...
|
---|
58 |
|
---|
59 | There are some standard formatting practices that are easy to get right.
|
---|
60 |
|
---|
61 | >>> roundtrip("if x == 1:\\n"
|
---|
62 | ... " print x\\n")
|
---|
63 | True
|
---|
64 |
|
---|
65 | >>> roundtrip("# This is a comment\\n# This also")
|
---|
66 | True
|
---|
67 |
|
---|
68 | Some people use different formatting conventions, which makes
|
---|
69 | untokenize a little trickier. Note that this test involves trailing
|
---|
70 | whitespace after the colon. Note that we use hex escapes to make the
|
---|
71 | two trailing blanks apperant in the expected output.
|
---|
72 |
|
---|
73 | >>> roundtrip("if x == 1 : \\n"
|
---|
74 | ... " print x\\n")
|
---|
75 | True
|
---|
76 |
|
---|
77 | >>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
|
---|
78 | >>> roundtrip(open(f))
|
---|
79 | True
|
---|
80 |
|
---|
81 | >>> roundtrip("if x == 1:\\n"
|
---|
82 | ... " # A comment by itself.\\n"
|
---|
83 | ... " print x # Comment here, too.\\n"
|
---|
84 | ... " # Another comment.\\n"
|
---|
85 | ... "after_if = True\\n")
|
---|
86 | True
|
---|
87 |
|
---|
88 | >>> roundtrip("if (x # The comments need to go in the right place\\n"
|
---|
89 | ... " == 1):\\n"
|
---|
90 | ... " print 'x==1'\\n")
|
---|
91 | True
|
---|
92 |
|
---|
93 | >>> roundtrip("class Test: # A comment here\\n"
|
---|
94 | ... " # A comment with weird indent\\n"
|
---|
95 | ... " after_com = 5\\n"
|
---|
96 | ... " def x(m): return m*5 # a one liner\\n"
|
---|
97 | ... " def y(m): # A whitespace after the colon\\n"
|
---|
98 | ... " return y*4 # 3-space indent\\n")
|
---|
99 | True
|
---|
100 |
|
---|
101 | Some error-handling code
|
---|
102 |
|
---|
103 | >>> roundtrip("try: import somemodule\\n"
|
---|
104 | ... "except ImportError: # comment\\n"
|
---|
105 | ... " print 'Can not import' # comment2\\n"
|
---|
106 | ... "else: print 'Loaded'\\n")
|
---|
107 | True
|
---|
108 |
|
---|
109 | Balancing continuation
|
---|
110 |
|
---|
111 | >>> roundtrip("a = (3,4, \\n"
|
---|
112 | ... "5,6)\\n"
|
---|
113 | ... "y = [3, 4,\\n"
|
---|
114 | ... "5]\\n"
|
---|
115 | ... "z = {'a': 5,\\n"
|
---|
116 | ... "'b':15, 'c':True}\\n"
|
---|
117 | ... "x = len(y) + 5 - a[\\n"
|
---|
118 | ... "3] - a[2]\\n"
|
---|
119 | ... "+ len(z) - z[\\n"
|
---|
120 | ... "'b']\\n")
|
---|
121 | True
|
---|
122 |
|
---|
123 | Ordinary integers and binary operators
|
---|
124 |
|
---|
125 | >>> dump_tokens("0xff <= 255")
|
---|
126 | NUMBER '0xff' (1, 0) (1, 4)
|
---|
127 | OP '<=' (1, 5) (1, 7)
|
---|
128 | NUMBER '255' (1, 8) (1, 11)
|
---|
129 | >>> dump_tokens("0b10 <= 255")
|
---|
130 | NUMBER '0b10' (1, 0) (1, 4)
|
---|
131 | OP '<=' (1, 5) (1, 7)
|
---|
132 | NUMBER '255' (1, 8) (1, 11)
|
---|
133 | >>> dump_tokens("0o123 <= 0123")
|
---|
134 | NUMBER '0o123' (1, 0) (1, 5)
|
---|
135 | OP '<=' (1, 6) (1, 8)
|
---|
136 | NUMBER '0123' (1, 9) (1, 13)
|
---|
137 | >>> dump_tokens("01234567 > ~0x15")
|
---|
138 | NUMBER '01234567' (1, 0) (1, 8)
|
---|
139 | OP '>' (1, 9) (1, 10)
|
---|
140 | OP '~' (1, 11) (1, 12)
|
---|
141 | NUMBER '0x15' (1, 12) (1, 16)
|
---|
142 | >>> dump_tokens("2134568 != 01231515")
|
---|
143 | NUMBER '2134568' (1, 0) (1, 7)
|
---|
144 | OP '!=' (1, 8) (1, 10)
|
---|
145 | NUMBER '01231515' (1, 11) (1, 19)
|
---|
146 | >>> dump_tokens("(-124561-1) & 0200000000")
|
---|
147 | OP '(' (1, 0) (1, 1)
|
---|
148 | OP '-' (1, 1) (1, 2)
|
---|
149 | NUMBER '124561' (1, 2) (1, 8)
|
---|
150 | OP '-' (1, 8) (1, 9)
|
---|
151 | NUMBER '1' (1, 9) (1, 10)
|
---|
152 | OP ')' (1, 10) (1, 11)
|
---|
153 | OP '&' (1, 12) (1, 13)
|
---|
154 | NUMBER '0200000000' (1, 14) (1, 24)
|
---|
155 | >>> dump_tokens("0xdeadbeef != -1")
|
---|
156 | NUMBER '0xdeadbeef' (1, 0) (1, 10)
|
---|
157 | OP '!=' (1, 11) (1, 13)
|
---|
158 | OP '-' (1, 14) (1, 15)
|
---|
159 | NUMBER '1' (1, 15) (1, 16)
|
---|
160 | >>> dump_tokens("0xdeadc0de & 012345")
|
---|
161 | NUMBER '0xdeadc0de' (1, 0) (1, 10)
|
---|
162 | OP '&' (1, 11) (1, 12)
|
---|
163 | NUMBER '012345' (1, 13) (1, 19)
|
---|
164 | >>> dump_tokens("0xFF & 0x15 | 1234")
|
---|
165 | NUMBER '0xFF' (1, 0) (1, 4)
|
---|
166 | OP '&' (1, 5) (1, 6)
|
---|
167 | NUMBER '0x15' (1, 7) (1, 11)
|
---|
168 | OP '|' (1, 12) (1, 13)
|
---|
169 | NUMBER '1234' (1, 14) (1, 18)
|
---|
170 |
|
---|
171 | Long integers
|
---|
172 |
|
---|
173 | >>> dump_tokens("x = 0L")
|
---|
174 | NAME 'x' (1, 0) (1, 1)
|
---|
175 | OP '=' (1, 2) (1, 3)
|
---|
176 | NUMBER '0L' (1, 4) (1, 6)
|
---|
177 | >>> dump_tokens("x = 0xfffffffffff")
|
---|
178 | NAME 'x' (1, 0) (1, 1)
|
---|
179 | OP '=' (1, 2) (1, 3)
|
---|
180 | NUMBER '0xffffffffff (1, 4) (1, 17)
|
---|
181 | >>> dump_tokens("x = 123141242151251616110l")
|
---|
182 | NAME 'x' (1, 0) (1, 1)
|
---|
183 | OP '=' (1, 2) (1, 3)
|
---|
184 | NUMBER '123141242151 (1, 4) (1, 26)
|
---|
185 | >>> dump_tokens("x = -15921590215012591L")
|
---|
186 | NAME 'x' (1, 0) (1, 1)
|
---|
187 | OP '=' (1, 2) (1, 3)
|
---|
188 | OP '-' (1, 4) (1, 5)
|
---|
189 | NUMBER '159215902150 (1, 5) (1, 23)
|
---|
190 |
|
---|
191 | Floating point numbers
|
---|
192 |
|
---|
193 | >>> dump_tokens("x = 3.14159")
|
---|
194 | NAME 'x' (1, 0) (1, 1)
|
---|
195 | OP '=' (1, 2) (1, 3)
|
---|
196 | NUMBER '3.14159' (1, 4) (1, 11)
|
---|
197 | >>> dump_tokens("x = 314159.")
|
---|
198 | NAME 'x' (1, 0) (1, 1)
|
---|
199 | OP '=' (1, 2) (1, 3)
|
---|
200 | NUMBER '314159.' (1, 4) (1, 11)
|
---|
201 | >>> dump_tokens("x = .314159")
|
---|
202 | NAME 'x' (1, 0) (1, 1)
|
---|
203 | OP '=' (1, 2) (1, 3)
|
---|
204 | NUMBER '.314159' (1, 4) (1, 11)
|
---|
205 | >>> dump_tokens("x = 3e14159")
|
---|
206 | NAME 'x' (1, 0) (1, 1)
|
---|
207 | OP '=' (1, 2) (1, 3)
|
---|
208 | NUMBER '3e14159' (1, 4) (1, 11)
|
---|
209 | >>> dump_tokens("x = 3E123")
|
---|
210 | NAME 'x' (1, 0) (1, 1)
|
---|
211 | OP '=' (1, 2) (1, 3)
|
---|
212 | NUMBER '3E123' (1, 4) (1, 9)
|
---|
213 | >>> dump_tokens("x+y = 3e-1230")
|
---|
214 | NAME 'x' (1, 0) (1, 1)
|
---|
215 | OP '+' (1, 1) (1, 2)
|
---|
216 | NAME 'y' (1, 2) (1, 3)
|
---|
217 | OP '=' (1, 4) (1, 5)
|
---|
218 | NUMBER '3e-1230' (1, 6) (1, 13)
|
---|
219 | >>> dump_tokens("x = 3.14e159")
|
---|
220 | NAME 'x' (1, 0) (1, 1)
|
---|
221 | OP '=' (1, 2) (1, 3)
|
---|
222 | NUMBER '3.14e159' (1, 4) (1, 12)
|
---|
223 |
|
---|
224 | String literals
|
---|
225 |
|
---|
226 | >>> dump_tokens("x = ''; y = \\\"\\\"")
|
---|
227 | NAME 'x' (1, 0) (1, 1)
|
---|
228 | OP '=' (1, 2) (1, 3)
|
---|
229 | STRING "''" (1, 4) (1, 6)
|
---|
230 | OP ';' (1, 6) (1, 7)
|
---|
231 | NAME 'y' (1, 8) (1, 9)
|
---|
232 | OP '=' (1, 10) (1, 11)
|
---|
233 | STRING '""' (1, 12) (1, 14)
|
---|
234 | >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
|
---|
235 | NAME 'x' (1, 0) (1, 1)
|
---|
236 | OP '=' (1, 2) (1, 3)
|
---|
237 | STRING '\\'"\\'' (1, 4) (1, 7)
|
---|
238 | OP ';' (1, 7) (1, 8)
|
---|
239 | NAME 'y' (1, 9) (1, 10)
|
---|
240 | OP '=' (1, 11) (1, 12)
|
---|
241 | STRING '"\\'"' (1, 13) (1, 16)
|
---|
242 | >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
|
---|
243 | NAME 'x' (1, 0) (1, 1)
|
---|
244 | OP '=' (1, 2) (1, 3)
|
---|
245 | STRING '"doesn\\'t "' (1, 4) (1, 14)
|
---|
246 | NAME 'shrink' (1, 14) (1, 20)
|
---|
247 | STRING '", does it"' (1, 20) (1, 31)
|
---|
248 | >>> dump_tokens("x = u'abc' + U'ABC'")
|
---|
249 | NAME 'x' (1, 0) (1, 1)
|
---|
250 | OP '=' (1, 2) (1, 3)
|
---|
251 | STRING "u'abc'" (1, 4) (1, 10)
|
---|
252 | OP '+' (1, 11) (1, 12)
|
---|
253 | STRING "U'ABC'" (1, 13) (1, 19)
|
---|
254 | >>> dump_tokens('y = u"ABC" + U"ABC"')
|
---|
255 | NAME 'y' (1, 0) (1, 1)
|
---|
256 | OP '=' (1, 2) (1, 3)
|
---|
257 | STRING 'u"ABC"' (1, 4) (1, 10)
|
---|
258 | OP '+' (1, 11) (1, 12)
|
---|
259 | STRING 'U"ABC"' (1, 13) (1, 19)
|
---|
260 | >>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
|
---|
261 | NAME 'x' (1, 0) (1, 1)
|
---|
262 | OP '=' (1, 2) (1, 3)
|
---|
263 | STRING "ur'abc'" (1, 4) (1, 11)
|
---|
264 | OP '+' (1, 12) (1, 13)
|
---|
265 | STRING "Ur'ABC'" (1, 14) (1, 21)
|
---|
266 | OP '+' (1, 22) (1, 23)
|
---|
267 | STRING "uR'ABC'" (1, 24) (1, 31)
|
---|
268 | OP '+' (1, 32) (1, 33)
|
---|
269 | STRING "UR'ABC'" (1, 34) (1, 41)
|
---|
270 | >>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
|
---|
271 | NAME 'y' (1, 0) (1, 1)
|
---|
272 | OP '=' (1, 2) (1, 3)
|
---|
273 | STRING 'ur"abc"' (1, 4) (1, 11)
|
---|
274 | OP '+' (1, 12) (1, 13)
|
---|
275 | STRING 'Ur"ABC"' (1, 14) (1, 21)
|
---|
276 | OP '+' (1, 22) (1, 23)
|
---|
277 | STRING 'uR"ABC"' (1, 24) (1, 31)
|
---|
278 | OP '+' (1, 32) (1, 33)
|
---|
279 | STRING 'UR"ABC"' (1, 34) (1, 41)
|
---|
280 |
|
---|
281 | >>> dump_tokens("b'abc' + B'abc'")
|
---|
282 | STRING "b'abc'" (1, 0) (1, 6)
|
---|
283 | OP '+' (1, 7) (1, 8)
|
---|
284 | STRING "B'abc'" (1, 9) (1, 15)
|
---|
285 | >>> dump_tokens('b"abc" + B"abc"')
|
---|
286 | STRING 'b"abc"' (1, 0) (1, 6)
|
---|
287 | OP '+' (1, 7) (1, 8)
|
---|
288 | STRING 'B"abc"' (1, 9) (1, 15)
|
---|
289 | >>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
|
---|
290 | STRING "br'abc'" (1, 0) (1, 7)
|
---|
291 | OP '+' (1, 8) (1, 9)
|
---|
292 | STRING "bR'abc'" (1, 10) (1, 17)
|
---|
293 | OP '+' (1, 18) (1, 19)
|
---|
294 | STRING "Br'abc'" (1, 20) (1, 27)
|
---|
295 | OP '+' (1, 28) (1, 29)
|
---|
296 | STRING "BR'abc'" (1, 30) (1, 37)
|
---|
297 | >>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
|
---|
298 | STRING 'br"abc"' (1, 0) (1, 7)
|
---|
299 | OP '+' (1, 8) (1, 9)
|
---|
300 | STRING 'bR"abc"' (1, 10) (1, 17)
|
---|
301 | OP '+' (1, 18) (1, 19)
|
---|
302 | STRING 'Br"abc"' (1, 20) (1, 27)
|
---|
303 | OP '+' (1, 28) (1, 29)
|
---|
304 | STRING 'BR"abc"' (1, 30) (1, 37)
|
---|
305 |
|
---|
306 | Operators
|
---|
307 |
|
---|
308 | >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
|
---|
309 | NAME 'def' (1, 0) (1, 3)
|
---|
310 | NAME 'd22' (1, 4) (1, 7)
|
---|
311 | OP '(' (1, 7) (1, 8)
|
---|
312 | NAME 'a' (1, 8) (1, 9)
|
---|
313 | OP ',' (1, 9) (1, 10)
|
---|
314 | NAME 'b' (1, 11) (1, 12)
|
---|
315 | OP ',' (1, 12) (1, 13)
|
---|
316 | NAME 'c' (1, 14) (1, 15)
|
---|
317 | OP '=' (1, 15) (1, 16)
|
---|
318 | NUMBER '2' (1, 16) (1, 17)
|
---|
319 | OP ',' (1, 17) (1, 18)
|
---|
320 | NAME 'd' (1, 19) (1, 20)
|
---|
321 | OP '=' (1, 20) (1, 21)
|
---|
322 | NUMBER '2' (1, 21) (1, 22)
|
---|
323 | OP ',' (1, 22) (1, 23)
|
---|
324 | OP '*' (1, 24) (1, 25)
|
---|
325 | NAME 'k' (1, 25) (1, 26)
|
---|
326 | OP ')' (1, 26) (1, 27)
|
---|
327 | OP ':' (1, 27) (1, 28)
|
---|
328 | NAME 'pass' (1, 29) (1, 33)
|
---|
329 | >>> dump_tokens("def d01v_(a=1, *k, **w): pass")
|
---|
330 | NAME 'def' (1, 0) (1, 3)
|
---|
331 | NAME 'd01v_' (1, 4) (1, 9)
|
---|
332 | OP '(' (1, 9) (1, 10)
|
---|
333 | NAME 'a' (1, 10) (1, 11)
|
---|
334 | OP '=' (1, 11) (1, 12)
|
---|
335 | NUMBER '1' (1, 12) (1, 13)
|
---|
336 | OP ',' (1, 13) (1, 14)
|
---|
337 | OP '*' (1, 15) (1, 16)
|
---|
338 | NAME 'k' (1, 16) (1, 17)
|
---|
339 | OP ',' (1, 17) (1, 18)
|
---|
340 | OP '**' (1, 19) (1, 21)
|
---|
341 | NAME 'w' (1, 21) (1, 22)
|
---|
342 | OP ')' (1, 22) (1, 23)
|
---|
343 | OP ':' (1, 23) (1, 24)
|
---|
344 | NAME 'pass' (1, 25) (1, 29)
|
---|
345 |
|
---|
346 | Comparison
|
---|
347 |
|
---|
348 | >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
|
---|
349 | ... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
|
---|
350 | NAME 'if' (1, 0) (1, 2)
|
---|
351 | NUMBER '1' (1, 3) (1, 4)
|
---|
352 | OP '<' (1, 5) (1, 6)
|
---|
353 | NUMBER '1' (1, 7) (1, 8)
|
---|
354 | OP '>' (1, 9) (1, 10)
|
---|
355 | NUMBER '1' (1, 11) (1, 12)
|
---|
356 | OP '==' (1, 13) (1, 15)
|
---|
357 | NUMBER '1' (1, 16) (1, 17)
|
---|
358 | OP '>=' (1, 18) (1, 20)
|
---|
359 | NUMBER '5' (1, 21) (1, 22)
|
---|
360 | OP '<=' (1, 23) (1, 25)
|
---|
361 | NUMBER '0x15' (1, 26) (1, 30)
|
---|
362 | OP '<=' (1, 31) (1, 33)
|
---|
363 | NUMBER '0x12' (1, 34) (1, 38)
|
---|
364 | OP '!=' (1, 39) (1, 41)
|
---|
365 | NUMBER '1' (1, 42) (1, 43)
|
---|
366 | NAME 'and' (1, 44) (1, 47)
|
---|
367 | NUMBER '5' (1, 48) (1, 49)
|
---|
368 | NAME 'in' (1, 50) (1, 52)
|
---|
369 | NUMBER '1' (1, 53) (1, 54)
|
---|
370 | NAME 'not' (1, 55) (1, 58)
|
---|
371 | NAME 'in' (1, 59) (1, 61)
|
---|
372 | NUMBER '1' (1, 62) (1, 63)
|
---|
373 | NAME 'is' (1, 64) (1, 66)
|
---|
374 | NUMBER '1' (1, 67) (1, 68)
|
---|
375 | NAME 'or' (1, 69) (1, 71)
|
---|
376 | NUMBER '5' (1, 72) (1, 73)
|
---|
377 | NAME 'is' (1, 74) (1, 76)
|
---|
378 | NAME 'not' (1, 77) (1, 80)
|
---|
379 | NUMBER '1' (1, 81) (1, 82)
|
---|
380 | OP ':' (1, 82) (1, 83)
|
---|
381 | NAME 'pass' (1, 84) (1, 88)
|
---|
382 |
|
---|
383 | Shift
|
---|
384 |
|
---|
385 | >>> dump_tokens("x = 1 << 1 >> 5")
|
---|
386 | NAME 'x' (1, 0) (1, 1)
|
---|
387 | OP '=' (1, 2) (1, 3)
|
---|
388 | NUMBER '1' (1, 4) (1, 5)
|
---|
389 | OP '<<' (1, 6) (1, 8)
|
---|
390 | NUMBER '1' (1, 9) (1, 10)
|
---|
391 | OP '>>' (1, 11) (1, 13)
|
---|
392 | NUMBER '5' (1, 14) (1, 15)
|
---|
393 |
|
---|
394 | Additive
|
---|
395 |
|
---|
396 | >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
|
---|
397 | NAME 'x' (1, 0) (1, 1)
|
---|
398 | OP '=' (1, 2) (1, 3)
|
---|
399 | NUMBER '1' (1, 4) (1, 5)
|
---|
400 | OP '-' (1, 6) (1, 7)
|
---|
401 | NAME 'y' (1, 8) (1, 9)
|
---|
402 | OP '+' (1, 10) (1, 11)
|
---|
403 | NUMBER '15' (1, 12) (1, 14)
|
---|
404 | OP '-' (1, 15) (1, 16)
|
---|
405 | NUMBER '01' (1, 17) (1, 19)
|
---|
406 | OP '+' (1, 20) (1, 21)
|
---|
407 | NUMBER '0x124' (1, 22) (1, 27)
|
---|
408 | OP '+' (1, 28) (1, 29)
|
---|
409 | NAME 'z' (1, 30) (1, 31)
|
---|
410 | OP '+' (1, 32) (1, 33)
|
---|
411 | NAME 'a' (1, 34) (1, 35)
|
---|
412 | OP '[' (1, 35) (1, 36)
|
---|
413 | NUMBER '5' (1, 36) (1, 37)
|
---|
414 | OP ']' (1, 37) (1, 38)
|
---|
415 |
|
---|
416 | Multiplicative
|
---|
417 |
|
---|
418 | >>> dump_tokens("x = 1//1*1/5*12%0x12")
|
---|
419 | NAME 'x' (1, 0) (1, 1)
|
---|
420 | OP '=' (1, 2) (1, 3)
|
---|
421 | NUMBER '1' (1, 4) (1, 5)
|
---|
422 | OP '//' (1, 5) (1, 7)
|
---|
423 | NUMBER '1' (1, 7) (1, 8)
|
---|
424 | OP '*' (1, 8) (1, 9)
|
---|
425 | NUMBER '1' (1, 9) (1, 10)
|
---|
426 | OP '/' (1, 10) (1, 11)
|
---|
427 | NUMBER '5' (1, 11) (1, 12)
|
---|
428 | OP '*' (1, 12) (1, 13)
|
---|
429 | NUMBER '12' (1, 13) (1, 15)
|
---|
430 | OP '%' (1, 15) (1, 16)
|
---|
431 | NUMBER '0x12' (1, 16) (1, 20)
|
---|
432 |
|
---|
433 | Unary
|
---|
434 |
|
---|
435 | >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
|
---|
436 | OP '~' (1, 0) (1, 1)
|
---|
437 | NUMBER '1' (1, 1) (1, 2)
|
---|
438 | OP '^' (1, 3) (1, 4)
|
---|
439 | NUMBER '1' (1, 5) (1, 6)
|
---|
440 | OP '&' (1, 7) (1, 8)
|
---|
441 | NUMBER '1' (1, 9) (1, 10)
|
---|
442 | OP '|' (1, 11) (1, 12)
|
---|
443 | NUMBER '1' (1, 12) (1, 13)
|
---|
444 | OP '^' (1, 14) (1, 15)
|
---|
445 | OP '-' (1, 16) (1, 17)
|
---|
446 | NUMBER '1' (1, 17) (1, 18)
|
---|
447 | >>> dump_tokens("-1*1/1+1*1//1 - ---1**1")
|
---|
448 | OP '-' (1, 0) (1, 1)
|
---|
449 | NUMBER '1' (1, 1) (1, 2)
|
---|
450 | OP '*' (1, 2) (1, 3)
|
---|
451 | NUMBER '1' (1, 3) (1, 4)
|
---|
452 | OP '/' (1, 4) (1, 5)
|
---|
453 | NUMBER '1' (1, 5) (1, 6)
|
---|
454 | OP '+' (1, 6) (1, 7)
|
---|
455 | NUMBER '1' (1, 7) (1, 8)
|
---|
456 | OP '*' (1, 8) (1, 9)
|
---|
457 | NUMBER '1' (1, 9) (1, 10)
|
---|
458 | OP '//' (1, 10) (1, 12)
|
---|
459 | NUMBER '1' (1, 12) (1, 13)
|
---|
460 | OP '-' (1, 14) (1, 15)
|
---|
461 | OP '-' (1, 16) (1, 17)
|
---|
462 | OP '-' (1, 17) (1, 18)
|
---|
463 | OP '-' (1, 18) (1, 19)
|
---|
464 | NUMBER '1' (1, 19) (1, 20)
|
---|
465 | OP '**' (1, 20) (1, 22)
|
---|
466 | NUMBER '1' (1, 22) (1, 23)
|
---|
467 |
|
---|
468 | Selector
|
---|
469 |
|
---|
470 | >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
|
---|
471 | NAME 'import' (1, 0) (1, 6)
|
---|
472 | NAME 'sys' (1, 7) (1, 10)
|
---|
473 | OP ',' (1, 10) (1, 11)
|
---|
474 | NAME 'time' (1, 12) (1, 16)
|
---|
475 | NEWLINE '\\n' (1, 16) (1, 17)
|
---|
476 | NAME 'x' (2, 0) (2, 1)
|
---|
477 | OP '=' (2, 2) (2, 3)
|
---|
478 | NAME 'sys' (2, 4) (2, 7)
|
---|
479 | OP '.' (2, 7) (2, 8)
|
---|
480 | NAME 'modules' (2, 8) (2, 15)
|
---|
481 | OP '[' (2, 15) (2, 16)
|
---|
482 | STRING "'time'" (2, 16) (2, 22)
|
---|
483 | OP ']' (2, 22) (2, 23)
|
---|
484 | OP '.' (2, 23) (2, 24)
|
---|
485 | NAME 'time' (2, 24) (2, 28)
|
---|
486 | OP '(' (2, 28) (2, 29)
|
---|
487 | OP ')' (2, 29) (2, 30)
|
---|
488 |
|
---|
489 | Methods
|
---|
490 |
|
---|
491 | >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
|
---|
492 | OP '@' (1, 0) (1, 1)
|
---|
493 | NAME 'staticmethod (1, 1) (1, 13)
|
---|
494 | NEWLINE '\\n' (1, 13) (1, 14)
|
---|
495 | NAME 'def' (2, 0) (2, 3)
|
---|
496 | NAME 'foo' (2, 4) (2, 7)
|
---|
497 | OP '(' (2, 7) (2, 8)
|
---|
498 | NAME 'x' (2, 8) (2, 9)
|
---|
499 | OP ',' (2, 9) (2, 10)
|
---|
500 | NAME 'y' (2, 10) (2, 11)
|
---|
501 | OP ')' (2, 11) (2, 12)
|
---|
502 | OP ':' (2, 12) (2, 13)
|
---|
503 | NAME 'pass' (2, 14) (2, 18)
|
---|
504 |
|
---|
505 | Backslash means line continuation, except for comments
|
---|
506 |
|
---|
507 | >>> roundtrip("x=1+\\\\n"
|
---|
508 | ... "1\\n"
|
---|
509 | ... "# This is a comment\\\\n"
|
---|
510 | ... "# This also\\n")
|
---|
511 | True
|
---|
512 | >>> roundtrip("# Comment \\\\nx = 0")
|
---|
513 | True
|
---|
514 |
|
---|
515 | Two string literals on the same line
|
---|
516 |
|
---|
517 | >>> roundtrip("'' ''")
|
---|
518 | True
|
---|
519 |
|
---|
520 | Test roundtrip on random python modules.
|
---|
521 | pass the '-ucpu' option to process the full directory.
|
---|
522 |
|
---|
523 | >>>
|
---|
524 | >>> tempdir = os.path.dirname(f) or os.curdir
|
---|
525 | >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
|
---|
526 |
|
---|
527 | >>> if not test_support.is_resource_enabled("cpu"):
|
---|
528 | ... testfiles = random.sample(testfiles, 10)
|
---|
529 | ...
|
---|
530 | >>> for testfile in testfiles:
|
---|
531 | ... if not roundtrip(open(testfile)):
|
---|
532 | ... print "Roundtrip failed for file %s" % testfile
|
---|
533 | ... break
|
---|
534 | ... else: True
|
---|
535 | True
|
---|
536 |
|
---|
537 | Evil tabs
|
---|
538 | >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
|
---|
539 | NAME 'def' (1, 0) (1, 3)
|
---|
540 | NAME 'f' (1, 4) (1, 5)
|
---|
541 | OP '(' (1, 5) (1, 6)
|
---|
542 | OP ')' (1, 6) (1, 7)
|
---|
543 | OP ':' (1, 7) (1, 8)
|
---|
544 | NEWLINE '\\n' (1, 8) (1, 9)
|
---|
545 | INDENT '\\t' (2, 0) (2, 1)
|
---|
546 | NAME 'if' (2, 1) (2, 3)
|
---|
547 | NAME 'x' (2, 4) (2, 5)
|
---|
548 | NEWLINE '\\n' (2, 5) (2, 6)
|
---|
549 | INDENT ' \\t' (3, 0) (3, 9)
|
---|
550 | NAME 'pass' (3, 9) (3, 13)
|
---|
551 | DEDENT '' (4, 0) (4, 0)
|
---|
552 | DEDENT '' (4, 0) (4, 0)
|
---|
553 |
|
---|
554 | Pathological whitespace (http://bugs.python.org/issue16152)
|
---|
555 | >>> dump_tokens("@ ")
|
---|
556 | OP '@' (1, 0) (1, 1)
|
---|
557 | """
|
---|
558 |
|
---|
559 |
|
---|
560 | from test import test_support
|
---|
561 | from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
|
---|
562 | STRING, ENDMARKER, tok_name)
|
---|
563 | from StringIO import StringIO
|
---|
564 | import os
|
---|
565 |
|
---|
566 | def dump_tokens(s):
|
---|
567 | """Print out the tokens in s in a table format.
|
---|
568 |
|
---|
569 | The ENDMARKER is omitted.
|
---|
570 | """
|
---|
571 | f = StringIO(s)
|
---|
572 | for type, token, start, end, line in generate_tokens(f.readline):
|
---|
573 | if type == ENDMARKER:
|
---|
574 | break
|
---|
575 | type = tok_name[type]
|
---|
576 | print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
|
---|
577 |
|
---|
578 | # This is an example from the docs, set up as a doctest.
|
---|
579 | def decistmt(s):
|
---|
580 | """Substitute Decimals for floats in a string of statements.
|
---|
581 |
|
---|
582 | >>> from decimal import Decimal
|
---|
583 | >>> s = 'print +21.3e-5*-.1234/81.7'
|
---|
584 | >>> decistmt(s)
|
---|
585 | "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
|
---|
586 |
|
---|
587 | The format of the exponent is inherited from the platform C library.
|
---|
588 | Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
|
---|
589 | we're only showing 12 digits, and the 13th isn't close to 5, the
|
---|
590 | rest of the output should be platform-independent.
|
---|
591 |
|
---|
592 | >>> exec(s) #doctest: +ELLIPSIS
|
---|
593 | -3.21716034272e-0...7
|
---|
594 |
|
---|
595 | Output from calculations with Decimal should be identical across all
|
---|
596 | platforms.
|
---|
597 |
|
---|
598 | >>> exec(decistmt(s))
|
---|
599 | -3.217160342717258261933904529E-7
|
---|
600 | """
|
---|
601 |
|
---|
602 | result = []
|
---|
603 | g = generate_tokens(StringIO(s).readline) # tokenize the string
|
---|
604 | for toknum, tokval, _, _, _ in g:
|
---|
605 | if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
|
---|
606 | result.extend([
|
---|
607 | (NAME, 'Decimal'),
|
---|
608 | (OP, '('),
|
---|
609 | (STRING, repr(tokval)),
|
---|
610 | (OP, ')')
|
---|
611 | ])
|
---|
612 | else:
|
---|
613 | result.append((toknum, tokval))
|
---|
614 | return untokenize(result)
|
---|
615 |
|
---|
616 |
|
---|
617 | __test__ = {"doctests" : doctests, 'decistmt': decistmt}
|
---|
618 |
|
---|
619 |
|
---|
620 | def test_main():
|
---|
621 | from test import test_tokenize
|
---|
622 | test_support.run_doctest(test_tokenize, True)
|
---|
623 |
|
---|
624 | if __name__ == "__main__":
|
---|
625 | test_main()
|
---|