Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

test_tokenize.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 22.8 KB

Line
1	doctests = """
2	Tests for the tokenize module.
3
4	>>> import glob, random, sys
5
6	The tests can be really simple. Given a small fragment of source
7	code, print out a table with tokens. The ENDMARK is omitted for
8	brevity.
9
10	>>> dump_tokens("1 + 1")
11	NUMBER '1' (1, 0) (1, 1)
12	OP '+' (1, 2) (1, 3)
13	NUMBER '1' (1, 4) (1, 5)
14
15	>>> dump_tokens("if False:\\n"
16	... " # NL\\n"
17	... " True = False # NEWLINE\\n")
18	NAME 'if' (1, 0) (1, 2)
19	NAME 'False' (1, 3) (1, 8)
20	OP ':' (1, 8) (1, 9)
21	NEWLINE '\\n' (1, 9) (1, 10)
22	COMMENT '# NL' (2, 4) (2, 8)
23	NL '\\n' (2, 8) (2, 9)
24	INDENT ' ' (3, 0) (3, 4)
25	NAME 'True' (3, 4) (3, 8)
26	OP '=' (3, 9) (3, 10)
27	NAME 'False' (3, 11) (3, 16)
28	COMMENT '# NEWLINE' (3, 17) (3, 26)
29	NEWLINE '\\n' (3, 26) (3, 27)
30	DEDENT '' (4, 0) (4, 0)
31
32	>>> indent_error_file = \"""
33	... def k(x):
34	... x += 2
35	... x += 5
36	... \"""
37
38	>>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass
39	Traceback (most recent call last):
40	...
41	IndentationError: unindent does not match any outer indentation level
42
43	Test roundtrip for `untokenize`. `f` is an open file or a string. The source
44	code in f is tokenized, converted back to source code via tokenize.untokenize(),
45	and tokenized again from the latter. The test fails if the second tokenization
46	doesn't match the first.
47
48	>>> def roundtrip(f):
49	... if isinstance(f, str): f = StringIO(f)
50	... token_list = list(generate_tokens(f.readline))
51	... f.close()
52	... tokens1 = [tok[:2] for tok in token_list]
53	... new_text = untokenize(tokens1)
54	... readline = iter(new_text.splitlines(1)).next
55	... tokens2 = [tok[:2] for tok in generate_tokens(readline)]
56	... return tokens1 == tokens2
57	...
58
59	There are some standard formatting practices that are easy to get right.
60
61	>>> roundtrip("if x == 1:\\n"
62	... " print x\\n")
63	True
64
65	>>> roundtrip("# This is a comment\\n# This also")
66	True
67
68	Some people use different formatting conventions, which makes
69	untokenize a little trickier. Note that this test involves trailing
70	whitespace after the colon. Note that we use hex escapes to make the
71	two trailing blanks apperant in the expected output.
72
73	>>> roundtrip("if x == 1 : \\n"
74	... " print x\\n")
75	True
76
77	>>> f = test_support.findfile("tokenize_tests" + os.extsep + "txt")
78	>>> roundtrip(open(f))
79	True
80
81	>>> roundtrip("if x == 1:\\n"
82	... " # A comment by itself.\\n"
83	... " print x # Comment here, too.\\n"
84	... " # Another comment.\\n"
85	... "after_if = True\\n")
86	True
87
88	>>> roundtrip("if (x # The comments need to go in the right place\\n"
89	... " == 1):\\n"
90	... " print 'x==1'\\n")
91	True
92
93	>>> roundtrip("class Test: # A comment here\\n"
94	... " # A comment with weird indent\\n"
95	... " after_com = 5\\n"
96	... " def x(m): return m*5 # a one liner\\n"
97	... " def y(m): # A whitespace after the colon\\n"
98	... " return y*4 # 3-space indent\\n")
99	True
100
101	Some error-handling code
102
103	>>> roundtrip("try: import somemodule\\n"
104	... "except ImportError: # comment\\n"
105	... " print 'Can not import' # comment2\\n"
106	... "else: print 'Loaded'\\n")
107	True
108
109	Balancing continuation
110
111	>>> roundtrip("a = (3,4, \\n"
112	... "5,6)\\n"
113	... "y = [3, 4,\\n"
114	... "5]\\n"
115	... "z = {'a': 5,\\n"
116	... "'b':15, 'c':True}\\n"
117	... "x = len(y) + 5 - a[\\n"
118	... "3] - a[2]\\n"
119	... "+ len(z) - z[\\n"
120	... "'b']\\n")
121	True
122
123	Ordinary integers and binary operators
124
125	>>> dump_tokens("0xff <= 255")
126	NUMBER '0xff' (1, 0) (1, 4)
127	OP '<=' (1, 5) (1, 7)
128	NUMBER '255' (1, 8) (1, 11)
129	>>> dump_tokens("0b10 <= 255")
130	NUMBER '0b10' (1, 0) (1, 4)
131	OP '<=' (1, 5) (1, 7)
132	NUMBER '255' (1, 8) (1, 11)
133	>>> dump_tokens("0o123 <= 0123")
134	NUMBER '0o123' (1, 0) (1, 5)
135	OP '<=' (1, 6) (1, 8)
136	NUMBER '0123' (1, 9) (1, 13)
137	>>> dump_tokens("01234567 > ~0x15")
138	NUMBER '01234567' (1, 0) (1, 8)
139	OP '>' (1, 9) (1, 10)
140	OP '~' (1, 11) (1, 12)
141	NUMBER '0x15' (1, 12) (1, 16)
142	>>> dump_tokens("2134568 != 01231515")
143	NUMBER '2134568' (1, 0) (1, 7)
144	OP '!=' (1, 8) (1, 10)
145	NUMBER '01231515' (1, 11) (1, 19)
146	>>> dump_tokens("(-124561-1) & 0200000000")
147	OP '(' (1, 0) (1, 1)
148	OP '-' (1, 1) (1, 2)
149	NUMBER '124561' (1, 2) (1, 8)
150	OP '-' (1, 8) (1, 9)
151	NUMBER '1' (1, 9) (1, 10)
152	OP ')' (1, 10) (1, 11)
153	OP '&' (1, 12) (1, 13)
154	NUMBER '0200000000' (1, 14) (1, 24)
155	>>> dump_tokens("0xdeadbeef != -1")
156	NUMBER '0xdeadbeef' (1, 0) (1, 10)
157	OP '!=' (1, 11) (1, 13)
158	OP '-' (1, 14) (1, 15)
159	NUMBER '1' (1, 15) (1, 16)
160	>>> dump_tokens("0xdeadc0de & 012345")
161	NUMBER '0xdeadc0de' (1, 0) (1, 10)
162	OP '&' (1, 11) (1, 12)
163	NUMBER '012345' (1, 13) (1, 19)
164	>>> dump_tokens("0xFF & 0x15 \| 1234")
165	NUMBER '0xFF' (1, 0) (1, 4)
166	OP '&' (1, 5) (1, 6)
167	NUMBER '0x15' (1, 7) (1, 11)
168	OP '\|' (1, 12) (1, 13)
169	NUMBER '1234' (1, 14) (1, 18)
170
171	Long integers
172
173	>>> dump_tokens("x = 0L")
174	NAME 'x' (1, 0) (1, 1)
175	OP '=' (1, 2) (1, 3)
176	NUMBER '0L' (1, 4) (1, 6)
177	>>> dump_tokens("x = 0xfffffffffff")
178	NAME 'x' (1, 0) (1, 1)
179	OP '=' (1, 2) (1, 3)
180	NUMBER '0xffffffffff (1, 4) (1, 17)
181	>>> dump_tokens("x = 123141242151251616110l")
182	NAME 'x' (1, 0) (1, 1)
183	OP '=' (1, 2) (1, 3)
184	NUMBER '123141242151 (1, 4) (1, 26)
185	>>> dump_tokens("x = -15921590215012591L")
186	NAME 'x' (1, 0) (1, 1)
187	OP '=' (1, 2) (1, 3)
188	OP '-' (1, 4) (1, 5)
189	NUMBER '159215902150 (1, 5) (1, 23)
190
191	Floating point numbers
192
193	>>> dump_tokens("x = 3.14159")
194	NAME 'x' (1, 0) (1, 1)
195	OP '=' (1, 2) (1, 3)
196	NUMBER '3.14159' (1, 4) (1, 11)
197	>>> dump_tokens("x = 314159.")
198	NAME 'x' (1, 0) (1, 1)
199	OP '=' (1, 2) (1, 3)
200	NUMBER '314159.' (1, 4) (1, 11)
201	>>> dump_tokens("x = .314159")
202	NAME 'x' (1, 0) (1, 1)
203	OP '=' (1, 2) (1, 3)
204	NUMBER '.314159' (1, 4) (1, 11)
205	>>> dump_tokens("x = 3e14159")
206	NAME 'x' (1, 0) (1, 1)
207	OP '=' (1, 2) (1, 3)
208	NUMBER '3e14159' (1, 4) (1, 11)
209	>>> dump_tokens("x = 3E123")
210	NAME 'x' (1, 0) (1, 1)
211	OP '=' (1, 2) (1, 3)
212	NUMBER '3E123' (1, 4) (1, 9)
213	>>> dump_tokens("x+y = 3e-1230")
214	NAME 'x' (1, 0) (1, 1)
215	OP '+' (1, 1) (1, 2)
216	NAME 'y' (1, 2) (1, 3)
217	OP '=' (1, 4) (1, 5)
218	NUMBER '3e-1230' (1, 6) (1, 13)
219	>>> dump_tokens("x = 3.14e159")
220	NAME 'x' (1, 0) (1, 1)
221	OP '=' (1, 2) (1, 3)
222	NUMBER '3.14e159' (1, 4) (1, 12)
223
224	String literals
225
226	>>> dump_tokens("x = ''; y = \\\"\\\"")
227	NAME 'x' (1, 0) (1, 1)
228	OP '=' (1, 2) (1, 3)
229	STRING "''" (1, 4) (1, 6)
230	OP ';' (1, 6) (1, 7)
231	NAME 'y' (1, 8) (1, 9)
232	OP '=' (1, 10) (1, 11)
233	STRING '""' (1, 12) (1, 14)
234	>>> dump_tokens("x = '\\\"'; y = \\\"'\\\"")
235	NAME 'x' (1, 0) (1, 1)
236	OP '=' (1, 2) (1, 3)
237	STRING '\\'"\\'' (1, 4) (1, 7)
238	OP ';' (1, 7) (1, 8)
239	NAME 'y' (1, 9) (1, 10)
240	OP '=' (1, 11) (1, 12)
241	STRING '"\\'"' (1, 13) (1, 16)
242	>>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"")
243	NAME 'x' (1, 0) (1, 1)
244	OP '=' (1, 2) (1, 3)
245	STRING '"doesn\\'t "' (1, 4) (1, 14)
246	NAME 'shrink' (1, 14) (1, 20)
247	STRING '", does it"' (1, 20) (1, 31)
248	>>> dump_tokens("x = u'abc' + U'ABC'")
249	NAME 'x' (1, 0) (1, 1)
250	OP '=' (1, 2) (1, 3)
251	STRING "u'abc'" (1, 4) (1, 10)
252	OP '+' (1, 11) (1, 12)
253	STRING "U'ABC'" (1, 13) (1, 19)
254	>>> dump_tokens('y = u"ABC" + U"ABC"')
255	NAME 'y' (1, 0) (1, 1)
256	OP '=' (1, 2) (1, 3)
257	STRING 'u"ABC"' (1, 4) (1, 10)
258	OP '+' (1, 11) (1, 12)
259	STRING 'U"ABC"' (1, 13) (1, 19)
260	>>> dump_tokens("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'")
261	NAME 'x' (1, 0) (1, 1)
262	OP '=' (1, 2) (1, 3)
263	STRING "ur'abc'" (1, 4) (1, 11)
264	OP '+' (1, 12) (1, 13)
265	STRING "Ur'ABC'" (1, 14) (1, 21)
266	OP '+' (1, 22) (1, 23)
267	STRING "uR'ABC'" (1, 24) (1, 31)
268	OP '+' (1, 32) (1, 33)
269	STRING "UR'ABC'" (1, 34) (1, 41)
270	>>> dump_tokens('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"')
271	NAME 'y' (1, 0) (1, 1)
272	OP '=' (1, 2) (1, 3)
273	STRING 'ur"abc"' (1, 4) (1, 11)
274	OP '+' (1, 12) (1, 13)
275	STRING 'Ur"ABC"' (1, 14) (1, 21)
276	OP '+' (1, 22) (1, 23)
277	STRING 'uR"ABC"' (1, 24) (1, 31)
278	OP '+' (1, 32) (1, 33)
279	STRING 'UR"ABC"' (1, 34) (1, 41)
280
281	>>> dump_tokens("b'abc' + B'abc'")
282	STRING "b'abc'" (1, 0) (1, 6)
283	OP '+' (1, 7) (1, 8)
284	STRING "B'abc'" (1, 9) (1, 15)
285	>>> dump_tokens('b"abc" + B"abc"')
286	STRING 'b"abc"' (1, 0) (1, 6)
287	OP '+' (1, 7) (1, 8)
288	STRING 'B"abc"' (1, 9) (1, 15)
289	>>> dump_tokens("br'abc' + bR'abc' + Br'abc' + BR'abc'")
290	STRING "br'abc'" (1, 0) (1, 7)
291	OP '+' (1, 8) (1, 9)
292	STRING "bR'abc'" (1, 10) (1, 17)
293	OP '+' (1, 18) (1, 19)
294	STRING "Br'abc'" (1, 20) (1, 27)
295	OP '+' (1, 28) (1, 29)
296	STRING "BR'abc'" (1, 30) (1, 37)
297	>>> dump_tokens('br"abc" + bR"abc" + Br"abc" + BR"abc"')
298	STRING 'br"abc"' (1, 0) (1, 7)
299	OP '+' (1, 8) (1, 9)
300	STRING 'bR"abc"' (1, 10) (1, 17)
301	OP '+' (1, 18) (1, 19)
302	STRING 'Br"abc"' (1, 20) (1, 27)
303	OP '+' (1, 28) (1, 29)
304	STRING 'BR"abc"' (1, 30) (1, 37)
305
306	Operators
307
308	>>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass")
309	NAME 'def' (1, 0) (1, 3)
310	NAME 'd22' (1, 4) (1, 7)
311	OP '(' (1, 7) (1, 8)
312	NAME 'a' (1, 8) (1, 9)
313	OP ',' (1, 9) (1, 10)
314	NAME 'b' (1, 11) (1, 12)
315	OP ',' (1, 12) (1, 13)
316	NAME 'c' (1, 14) (1, 15)
317	OP '=' (1, 15) (1, 16)
318	NUMBER '2' (1, 16) (1, 17)
319	OP ',' (1, 17) (1, 18)
320	NAME 'd' (1, 19) (1, 20)
321	OP '=' (1, 20) (1, 21)
322	NUMBER '2' (1, 21) (1, 22)
323	OP ',' (1, 22) (1, 23)
324	OP '*' (1, 24) (1, 25)
325	NAME 'k' (1, 25) (1, 26)
326	OP ')' (1, 26) (1, 27)
327	OP ':' (1, 27) (1, 28)
328	NAME 'pass' (1, 29) (1, 33)
329	>>> dump_tokens("def d01v_(a=1, k, *w): pass")
330	NAME 'def' (1, 0) (1, 3)
331	NAME 'd01v_' (1, 4) (1, 9)
332	OP '(' (1, 9) (1, 10)
333	NAME 'a' (1, 10) (1, 11)
334	OP '=' (1, 11) (1, 12)
335	NUMBER '1' (1, 12) (1, 13)
336	OP ',' (1, 13) (1, 14)
337	OP '*' (1, 15) (1, 16)
338	NAME 'k' (1, 16) (1, 17)
339	OP ',' (1, 17) (1, 18)
340	OP '**' (1, 19) (1, 21)
341	NAME 'w' (1, 21) (1, 22)
342	OP ')' (1, 22) (1, 23)
343	OP ':' (1, 23) (1, 24)
344	NAME 'pass' (1, 25) (1, 29)
345
346	Comparison
347
348	>>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
349	... "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass")
350	NAME 'if' (1, 0) (1, 2)
351	NUMBER '1' (1, 3) (1, 4)
352	OP '<' (1, 5) (1, 6)
353	NUMBER '1' (1, 7) (1, 8)
354	OP '>' (1, 9) (1, 10)
355	NUMBER '1' (1, 11) (1, 12)
356	OP '==' (1, 13) (1, 15)
357	NUMBER '1' (1, 16) (1, 17)
358	OP '>=' (1, 18) (1, 20)
359	NUMBER '5' (1, 21) (1, 22)
360	OP '<=' (1, 23) (1, 25)
361	NUMBER '0x15' (1, 26) (1, 30)
362	OP '<=' (1, 31) (1, 33)
363	NUMBER '0x12' (1, 34) (1, 38)
364	OP '!=' (1, 39) (1, 41)
365	NUMBER '1' (1, 42) (1, 43)
366	NAME 'and' (1, 44) (1, 47)
367	NUMBER '5' (1, 48) (1, 49)
368	NAME 'in' (1, 50) (1, 52)
369	NUMBER '1' (1, 53) (1, 54)
370	NAME 'not' (1, 55) (1, 58)
371	NAME 'in' (1, 59) (1, 61)
372	NUMBER '1' (1, 62) (1, 63)
373	NAME 'is' (1, 64) (1, 66)
374	NUMBER '1' (1, 67) (1, 68)
375	NAME 'or' (1, 69) (1, 71)
376	NUMBER '5' (1, 72) (1, 73)
377	NAME 'is' (1, 74) (1, 76)
378	NAME 'not' (1, 77) (1, 80)
379	NUMBER '1' (1, 81) (1, 82)
380	OP ':' (1, 82) (1, 83)
381	NAME 'pass' (1, 84) (1, 88)
382
383	Shift
384
385	>>> dump_tokens("x = 1 << 1 >> 5")
386	NAME 'x' (1, 0) (1, 1)
387	OP '=' (1, 2) (1, 3)
388	NUMBER '1' (1, 4) (1, 5)
389	OP '<<' (1, 6) (1, 8)
390	NUMBER '1' (1, 9) (1, 10)
391	OP '>>' (1, 11) (1, 13)
392	NUMBER '5' (1, 14) (1, 15)
393
394	Additive
395
396	>>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]")
397	NAME 'x' (1, 0) (1, 1)
398	OP '=' (1, 2) (1, 3)
399	NUMBER '1' (1, 4) (1, 5)
400	OP '-' (1, 6) (1, 7)
401	NAME 'y' (1, 8) (1, 9)
402	OP '+' (1, 10) (1, 11)
403	NUMBER '15' (1, 12) (1, 14)
404	OP '-' (1, 15) (1, 16)
405	NUMBER '01' (1, 17) (1, 19)
406	OP '+' (1, 20) (1, 21)
407	NUMBER '0x124' (1, 22) (1, 27)
408	OP '+' (1, 28) (1, 29)
409	NAME 'z' (1, 30) (1, 31)
410	OP '+' (1, 32) (1, 33)
411	NAME 'a' (1, 34) (1, 35)
412	OP '[' (1, 35) (1, 36)
413	NUMBER '5' (1, 36) (1, 37)
414	OP ']' (1, 37) (1, 38)
415
416	Multiplicative
417
418	>>> dump_tokens("x = 1//11/512%0x12")
419	NAME 'x' (1, 0) (1, 1)
420	OP '=' (1, 2) (1, 3)
421	NUMBER '1' (1, 4) (1, 5)
422	OP '//' (1, 5) (1, 7)
423	NUMBER '1' (1, 7) (1, 8)
424	OP '*' (1, 8) (1, 9)
425	NUMBER '1' (1, 9) (1, 10)
426	OP '/' (1, 10) (1, 11)
427	NUMBER '5' (1, 11) (1, 12)
428	OP '*' (1, 12) (1, 13)
429	NUMBER '12' (1, 13) (1, 15)
430	OP '%' (1, 15) (1, 16)
431	NUMBER '0x12' (1, 16) (1, 20)
432
433	Unary
434
435	>>> dump_tokens("~1 ^ 1 & 1 \|1 ^ -1")
436	OP '~' (1, 0) (1, 1)
437	NUMBER '1' (1, 1) (1, 2)
438	OP '^' (1, 3) (1, 4)
439	NUMBER '1' (1, 5) (1, 6)
440	OP '&' (1, 7) (1, 8)
441	NUMBER '1' (1, 9) (1, 10)
442	OP '\|' (1, 11) (1, 12)
443	NUMBER '1' (1, 12) (1, 13)
444	OP '^' (1, 14) (1, 15)
445	OP '-' (1, 16) (1, 17)
446	NUMBER '1' (1, 17) (1, 18)
447	>>> dump_tokens("-11/1+11//1 - ---1**1")
448	OP '-' (1, 0) (1, 1)
449	NUMBER '1' (1, 1) (1, 2)
450	OP '*' (1, 2) (1, 3)
451	NUMBER '1' (1, 3) (1, 4)
452	OP '/' (1, 4) (1, 5)
453	NUMBER '1' (1, 5) (1, 6)
454	OP '+' (1, 6) (1, 7)
455	NUMBER '1' (1, 7) (1, 8)
456	OP '*' (1, 8) (1, 9)
457	NUMBER '1' (1, 9) (1, 10)
458	OP '//' (1, 10) (1, 12)
459	NUMBER '1' (1, 12) (1, 13)
460	OP '-' (1, 14) (1, 15)
461	OP '-' (1, 16) (1, 17)
462	OP '-' (1, 17) (1, 18)
463	OP '-' (1, 18) (1, 19)
464	NUMBER '1' (1, 19) (1, 20)
465	OP '**' (1, 20) (1, 22)
466	NUMBER '1' (1, 22) (1, 23)
467
468	Selector
469
470	>>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()")
471	NAME 'import' (1, 0) (1, 6)
472	NAME 'sys' (1, 7) (1, 10)
473	OP ',' (1, 10) (1, 11)
474	NAME 'time' (1, 12) (1, 16)
475	NEWLINE '\\n' (1, 16) (1, 17)
476	NAME 'x' (2, 0) (2, 1)
477	OP '=' (2, 2) (2, 3)
478	NAME 'sys' (2, 4) (2, 7)
479	OP '.' (2, 7) (2, 8)
480	NAME 'modules' (2, 8) (2, 15)
481	OP '[' (2, 15) (2, 16)
482	STRING "'time'" (2, 16) (2, 22)
483	OP ']' (2, 22) (2, 23)
484	OP '.' (2, 23) (2, 24)
485	NAME 'time' (2, 24) (2, 28)
486	OP '(' (2, 28) (2, 29)
487	OP ')' (2, 29) (2, 30)
488
489	Methods
490
491	>>> dump_tokens("@staticmethod\\ndef foo(x,y): pass")
492	OP '@' (1, 0) (1, 1)
493	NAME 'staticmethod (1, 1) (1, 13)
494	NEWLINE '\\n' (1, 13) (1, 14)
495	NAME 'def' (2, 0) (2, 3)
496	NAME 'foo' (2, 4) (2, 7)
497	OP '(' (2, 7) (2, 8)
498	NAME 'x' (2, 8) (2, 9)
499	OP ',' (2, 9) (2, 10)
500	NAME 'y' (2, 10) (2, 11)
501	OP ')' (2, 11) (2, 12)
502	OP ':' (2, 12) (2, 13)
503	NAME 'pass' (2, 14) (2, 18)
504
505	Backslash means line continuation, except for comments
506
507	>>> roundtrip("x=1+\\\\n"
508	... "1\\n"
509	... "# This is a comment\\\\n"
510	... "# This also\\n")
511	True
512	>>> roundtrip("# Comment \\\\nx = 0")
513	True
514
515	Two string literals on the same line
516
517	>>> roundtrip("'' ''")
518	True
519
520	Test roundtrip on random python modules.
521	pass the '-ucpu' option to process the full directory.
522
523	>>>
524	>>> tempdir = os.path.dirname(f) or os.curdir
525	>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
526
527	>>> if not test_support.is_resource_enabled("cpu"):
528	... testfiles = random.sample(testfiles, 10)
529	...
530	>>> for testfile in testfiles:
531	... if not roundtrip(open(testfile)):
532	... print "Roundtrip failed for file %s" % testfile
533	... break
534	... else: True
535	True
536
537	Evil tabs
538	>>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
539	NAME 'def' (1, 0) (1, 3)
540	NAME 'f' (1, 4) (1, 5)
541	OP '(' (1, 5) (1, 6)
542	OP ')' (1, 6) (1, 7)
543	OP ':' (1, 7) (1, 8)
544	NEWLINE '\\n' (1, 8) (1, 9)
545	INDENT '\\t' (2, 0) (2, 1)
546	NAME 'if' (2, 1) (2, 3)
547	NAME 'x' (2, 4) (2, 5)
548	NEWLINE '\\n' (2, 5) (2, 6)
549	INDENT ' \\t' (3, 0) (3, 9)
550	NAME 'pass' (3, 9) (3, 13)
551	DEDENT '' (4, 0) (4, 0)
552	DEDENT '' (4, 0) (4, 0)
553
554	Pathological whitespace (http://bugs.python.org/issue16152)
555	>>> dump_tokens("@ ")
556	OP '@' (1, 0) (1, 1)
557	"""
558
559
560	from test import test_support
561	from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
562	STRING, ENDMARKER, tok_name)
563	from StringIO import StringIO
564	import os
565
566	def dump_tokens(s):
567	"""Print out the tokens in s in a table format.
568
569	The ENDMARKER is omitted.
570	"""
571	f = StringIO(s)
572	for type, token, start, end, line in generate_tokens(f.readline):
573	if type == ENDMARKER:
574	break
575	type = tok_name[type]
576	print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
577
578	# This is an example from the docs, set up as a doctest.
579	def decistmt(s):
580	"""Substitute Decimals for floats in a string of statements.
581
582	>>> from decimal import Decimal
583	>>> s = 'print +21.3e-5*-.1234/81.7'
584	>>> decistmt(s)
585	"print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')"
586
587	The format of the exponent is inherited from the platform C library.
588	Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
589	we're only showing 12 digits, and the 13th isn't close to 5, the
590	rest of the output should be platform-independent.
591
592	>>> exec(s) #doctest: +ELLIPSIS
593	-3.21716034272e-0...7
594
595	Output from calculations with Decimal should be identical across all
596	platforms.
597
598	>>> exec(decistmt(s))
599	-3.217160342717258261933904529E-7
600	"""
601
602	result = []
603	g = generate_tokens(StringIO(s).readline) # tokenize the string
604	for toknum, tokval, _, _, _ in g:
605	if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
606	result.extend([
607	(NAME, 'Decimal'),
608	(OP, '('),
609	(STRING, repr(tokval)),
610	(OP, ')')
611	])
612	else:
613	result.append((toknum, tokval))
614	return untokenize(result)
615
616
617	__test__ = {"doctests" : doctests, 'decistmt': decistmt}
618
619
620	def test_main():
621	from test import test_tokenize
622	test_support.run_doctest(test_tokenize, True)
623
624	if __name__ == "__main__":
625	test_main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/test/test_tokenize.py

Download in other formats: