Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

test_textwrap.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 25.0 KB

Line
1	#
2	# Test suite for the textwrap module.
3	#
4	# Original tests written by Greg Ward <gward@python.net>.
5	# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6	# Currently maintained by Greg Ward.
7	#
8	# $Id$
9	#
10
11	import unittest
12	from test import test_support
13
14	from textwrap import TextWrapper, wrap, fill, dedent
15
16
17	class BaseTestCase(unittest.TestCase):
18	'''Parent class with utility methods for textwrap tests.'''
19
20	def show(self, textin):
21	if isinstance(textin, list):
22	result = []
23	for i in range(len(textin)):
24	result.append(" %d: %r" % (i, textin[i]))
25	result = '\n'.join(result)
26	elif isinstance(textin, basestring):
27	result = " %s\n" % repr(textin)
28	return result
29
30
31	def check(self, result, expect):
32	self.assertEqual(result, expect,
33	'expected:\n%s\nbut got:\n%s' % (
34	self.show(expect), self.show(result)))
35
36	def check_wrap(self, text, width, expect, **kwargs):
37	result = wrap(text, width, **kwargs)
38	self.check(result, expect)
39
40	def check_split(self, text, expect):
41	result = self.wrapper._split(text)
42	self.assertEqual(result, expect,
43	"\nexpected %r\n"
44	"but got %r" % (expect, result))
45
46
47	class WrapTestCase(BaseTestCase):
48
49	def setUp(self):
50	self.wrapper = TextWrapper(width=45)
51
52	def test_simple(self):
53	# Simple case: just words, spaces, and a bit of punctuation
54
55	text = "Hello there, how are you this fine day? I'm glad to hear it!"
56
57	self.check_wrap(text, 12,
58	["Hello there,",
59	"how are you",
60	"this fine",
61	"day? I'm",
62	"glad to hear",
63	"it!"])
64	self.check_wrap(text, 42,
65	["Hello there, how are you this fine day?",
66	"I'm glad to hear it!"])
67	self.check_wrap(text, 80, [text])
68
69	def test_empty_string(self):
70	# Check that wrapping the empty string returns an empty list.
71	self.check_wrap("", 6, [])
72	self.check_wrap("", 6, [], drop_whitespace=False)
73
74	def test_empty_string_with_initial_indent(self):
75	# Check that the empty string is not indented.
76	self.check_wrap("", 6, [], initial_indent="++")
77	self.check_wrap("", 6, [], initial_indent="++", drop_whitespace=False)
78
79	def test_whitespace(self):
80	# Whitespace munging and end-of-sentence detection
81
82	text = """\
83	This is a paragraph that already has
84	line breaks. But some of its lines are much longer than the others,
85	so it needs to be wrapped.
86	Some lines are \ttabbed too.
87	What a mess!
88	"""
89
90	expect = ["This is a paragraph that already has line",
91	"breaks. But some of its lines are much",
92	"longer than the others, so it needs to be",
93	"wrapped. Some lines are tabbed too. What a",
94	"mess!"]
95
96	wrapper = TextWrapper(45, fix_sentence_endings=True)
97	result = wrapper.wrap(text)
98	self.check(result, expect)
99
100	result = wrapper.fill(text)
101	self.check(result, '\n'.join(expect))
102
103	def test_fix_sentence_endings(self):
104	wrapper = TextWrapper(60, fix_sentence_endings=True)
105
106	# SF #847346: ensure that fix_sentence_endings=True does the
107	# right thing even on input short enough that it doesn't need to
108	# be wrapped.
109	text = "A short line. Note the single space."
110	expect = ["A short line. Note the single space."]
111	self.check(wrapper.wrap(text), expect)
112
113	# Test some of the hairy end cases that _fix_sentence_endings()
114	# is supposed to handle (the easy stuff is tested in
115	# test_whitespace() above).
116	text = "Well, Doctor? What do you think?"
117	expect = ["Well, Doctor? What do you think?"]
118	self.check(wrapper.wrap(text), expect)
119
120	text = "Well, Doctor?\nWhat do you think?"
121	self.check(wrapper.wrap(text), expect)
122
123	text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
124	expect = ['I say, chaps! Anyone for "tennis?" Hmmph!']
125	self.check(wrapper.wrap(text), expect)
126
127	wrapper.width = 20
128	expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
129	self.check(wrapper.wrap(text), expect)
130
131	text = 'And she said, "Go to hell!"\nCan you believe that?'
132	expect = ['And she said, "Go to',
133	'hell!" Can you',
134	'believe that?']
135	self.check(wrapper.wrap(text), expect)
136
137	wrapper.width = 60
138	expect = ['And she said, "Go to hell!" Can you believe that?']
139	self.check(wrapper.wrap(text), expect)
140
141	text = 'File stdio.h is nice.'
142	expect = ['File stdio.h is nice.']
143	self.check(wrapper.wrap(text), expect)
144
145	def test_wrap_short(self):
146	# Wrapping to make short lines longer
147
148	text = "This is a\nshort paragraph."
149
150	self.check_wrap(text, 20, ["This is a short",
151	"paragraph."])
152	self.check_wrap(text, 40, ["This is a short paragraph."])
153
154
155	def test_wrap_short_1line(self):
156	# Test endcases
157
158	text = "This is a short line."
159
160	self.check_wrap(text, 30, ["This is a short line."])
161	self.check_wrap(text, 30, ["(1) This is a short line."],
162	initial_indent="(1) ")
163
164
165	def test_hyphenated(self):
166	# Test breaking hyphenated words
167
168	text = ("this-is-a-useful-feature-for-"
169	"reformatting-posts-from-tim-peters'ly")
170
171	self.check_wrap(text, 40,
172	["this-is-a-useful-feature-for-",
173	"reformatting-posts-from-tim-peters'ly"])
174	self.check_wrap(text, 41,
175	["this-is-a-useful-feature-for-",
176	"reformatting-posts-from-tim-peters'ly"])
177	self.check_wrap(text, 42,
178	["this-is-a-useful-feature-for-reformatting-",
179	"posts-from-tim-peters'ly"])
180
181	def test_hyphenated_numbers(self):
182	# Test that hyphenated numbers (eg. dates) are not broken like words.
183	text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n"
184	"released on 1994-02-15.")
185
186	self.check_wrap(text, 35, ['Python 1.0.0 was released on',
187	'1994-01-26. Python 1.0.1 was',
188	'released on 1994-02-15.'])
189	self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
190	'Python 1.0.1 was released on 1994-02-15.'])
191
192	text = "I do all my shopping at 7-11."
193	self.check_wrap(text, 25, ["I do all my shopping at",
194	"7-11."])
195	self.check_wrap(text, 27, ["I do all my shopping at",
196	"7-11."])
197	self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
198
199	def test_em_dash(self):
200	# Test text with em-dashes
201	text = "Em-dashes should be written -- thus."
202	self.check_wrap(text, 25,
203	["Em-dashes should be",
204	"written -- thus."])
205
206	# Probe the boundaries of the properly written em-dash,
207	# ie. " -- ".
208	self.check_wrap(text, 29,
209	["Em-dashes should be written",
210	"-- thus."])
211	expect = ["Em-dashes should be written --",
212	"thus."]
213	self.check_wrap(text, 30, expect)
214	self.check_wrap(text, 35, expect)
215	self.check_wrap(text, 36,
216	["Em-dashes should be written -- thus."])
217
218	# The improperly written em-dash is handled too, because
219	# it's adjacent to non-whitespace on both sides.
220	text = "You can also do--this or even---this."
221	expect = ["You can also do",
222	"--this or even",
223	"---this."]
224	self.check_wrap(text, 15, expect)
225	self.check_wrap(text, 16, expect)
226	expect = ["You can also do--",
227	"this or even---",
228	"this."]
229	self.check_wrap(text, 17, expect)
230	self.check_wrap(text, 19, expect)
231	expect = ["You can also do--this or even",
232	"---this."]
233	self.check_wrap(text, 29, expect)
234	self.check_wrap(text, 31, expect)
235	expect = ["You can also do--this or even---",
236	"this."]
237	self.check_wrap(text, 32, expect)
238	self.check_wrap(text, 35, expect)
239
240	# All of the above behaviour could be deduced by probing the
241	# _split() method.
242	text = "Here's an -- em-dash and--here's another---and another!"
243	expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
244	"and", "--", "here's", " ", "another", "---",
245	"and", " ", "another!"]
246	self.check_split(text, expect)
247
248	text = "and then--bam!--he was gone"
249	expect = ["and", " ", "then", "--", "bam!", "--",
250	"he", " ", "was", " ", "gone"]
251	self.check_split(text, expect)
252
253
254	def test_unix_options (self):
255	# Test that Unix-style command-line options are wrapped correctly.
256	# Both Optik (OptionParser) and Docutils rely on this behaviour!
257
258	text = "You should use the -n option, or --dry-run in its long form."
259	self.check_wrap(text, 20,
260	["You should use the",
261	"-n option, or --dry-",
262	"run in its long",
263	"form."])
264	self.check_wrap(text, 21,
265	["You should use the -n",
266	"option, or --dry-run",
267	"in its long form."])
268	expect = ["You should use the -n option, or",
269	"--dry-run in its long form."]
270	self.check_wrap(text, 32, expect)
271	self.check_wrap(text, 34, expect)
272	self.check_wrap(text, 35, expect)
273	self.check_wrap(text, 38, expect)
274	expect = ["You should use the -n option, or --dry-",
275	"run in its long form."]
276	self.check_wrap(text, 39, expect)
277	self.check_wrap(text, 41, expect)
278	expect = ["You should use the -n option, or --dry-run",
279	"in its long form."]
280	self.check_wrap(text, 42, expect)
281
282	# Again, all of the above can be deduced from _split().
283	text = "the -n option, or --dry-run or --dryrun"
284	expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
285	"--dry-", "run", " ", "or", " ", "--dryrun"]
286	self.check_split(text, expect)
287
288	def test_funky_hyphens (self):
289	# Screwy edge cases cooked up by David Goodger. All reported
290	# in SF bug #596434.
291	self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
292	self.check_split("what the--", ["what", " ", "the--"])
293	self.check_split("what the--.", ["what", " ", "the--."])
294	self.check_split("--text--.", ["--text--."])
295
296	# When I first read bug #596434, this is what I thought David
297	# was talking about. I was wrong; these have always worked
298	# fine. The real problem is tested in test_funky_parens()
299	# below...
300	self.check_split("--option", ["--option"])
301	self.check_split("--option-opt", ["--option-", "opt"])
302	self.check_split("foo --option-opt bar",
303	["foo", " ", "--option-", "opt", " ", "bar"])
304
305	def test_punct_hyphens(self):
306	# Oh bother, SF #965425 found another problem with hyphens --
307	# hyphenated words in single quotes weren't handled correctly.
308	# In fact, the bug is that any punctuation around a hyphenated
309	# word was handled incorrectly, except for a leading "--", which
310	# was special-cased for Optik and Docutils. So test a variety
311	# of styles of punctuation around a hyphenated word.
312	# (Actually this is based on an Optik bug report, #813077).
313	self.check_split("the 'wibble-wobble' widget",
314	['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
315	self.check_split('the "wibble-wobble" widget',
316	['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
317	self.check_split("the (wibble-wobble) widget",
318	['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
319	self.check_split("the ['wibble-wobble'] widget",
320	['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
321
322	def test_funky_parens (self):
323	# Second part of SF bug #596434: long option strings inside
324	# parentheses.
325	self.check_split("foo (--option) bar",
326	["foo", " ", "(--option)", " ", "bar"])
327
328	# Related stuff -- make sure parens work in simpler contexts.
329	self.check_split("foo (bar) baz",
330	["foo", " ", "(bar)", " ", "baz"])
331	self.check_split("blah (ding dong), wubba",
332	["blah", " ", "(ding", " ", "dong),",
333	" ", "wubba"])
334
335	def test_drop_whitespace_false(self):
336	# Check that drop_whitespace=False preserves whitespace.
337	# SF patch #1581073
338	text = " This is a sentence with much whitespace."
339	self.check_wrap(text, 10,
340	[" This is a", " ", "sentence ",
341	"with ", "much white", "space."],
342	drop_whitespace=False)
343
344	def test_drop_whitespace_false_whitespace_only(self):
345	# Check that drop_whitespace=False preserves a whitespace-only string.
346	self.check_wrap(" ", 6, [" "], drop_whitespace=False)
347
348	def test_drop_whitespace_false_whitespace_only_with_indent(self):
349	# Check that a whitespace-only string gets indented (when
350	# drop_whitespace is False).
351	self.check_wrap(" ", 6, [" "], drop_whitespace=False,
352	initial_indent=" ")
353
354	def test_drop_whitespace_whitespace_only(self):
355	# Check drop_whitespace on a whitespace-only string.
356	self.check_wrap(" ", 6, [])
357
358	def test_drop_whitespace_leading_whitespace(self):
359	# Check that drop_whitespace does not drop leading whitespace (if
360	# followed by non-whitespace).
361	# SF bug #622849 reported inconsistent handling of leading
362	# whitespace; let's test that a bit, shall we?
363	text = " This is a sentence with leading whitespace."
364	self.check_wrap(text, 50,
365	[" This is a sentence with leading whitespace."])
366	self.check_wrap(text, 30,
367	[" This is a sentence with", "leading whitespace."])
368
369	def test_drop_whitespace_whitespace_line(self):
370	# Check that drop_whitespace skips the whole line if a non-leading
371	# line consists only of whitespace.
372	text = "abcd efgh"
373	# Include the result for drop_whitespace=False for comparison.
374	self.check_wrap(text, 6, ["abcd", " ", "efgh"],
375	drop_whitespace=False)
376	self.check_wrap(text, 6, ["abcd", "efgh"])
377
378	def test_drop_whitespace_whitespace_only_with_indent(self):
379	# Check that initial_indent is not applied to a whitespace-only
380	# string. This checks a special case of the fact that dropping
381	# whitespace occurs before indenting.
382	self.check_wrap(" ", 6, [], initial_indent="++")
383
384	def test_drop_whitespace_whitespace_indent(self):
385	# Check that drop_whitespace does not drop whitespace indents.
386	# This checks a special case of the fact that dropping whitespace
387	# occurs before indenting.
388	self.check_wrap("abcd efgh", 6, [" abcd", " efgh"],
389	initial_indent=" ", subsequent_indent=" ")
390
391	if test_support.have_unicode:
392	def test_unicode(self):
393	# Very simple test of wrapping Unicode strings. I'm sure
394	# there's more to it than this, but let's at least make
395	# sure textwrap doesn't crash on Unicode input!
396	text = u"Hello there, how are you today?"
397	self.check_wrap(text, 50, [u"Hello there, how are you today?"])
398	self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
399	olines = self.wrapper.wrap(text)
400	self.assertIsInstance(olines, list)
401	self.assertIsInstance(olines[0], unicode)
402	otext = self.wrapper.fill(text)
403	self.assertIsInstance(otext, unicode)
404
405	def test_no_split_at_umlaut(self):
406	text = u"Die Empf\xe4nger-Auswahl"
407	self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
408
409	def test_umlaut_followed_by_dash(self):
410	text = u"aa \xe4\xe4-\xe4\xe4"
411	self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
412
413	def test_split(self):
414	# Ensure that the standard _split() method works as advertised
415	# in the comments
416
417	text = "Hello there -- you goof-ball, use the -b option!"
418
419	result = self.wrapper._split(text)
420	self.check(result,
421	["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
422	"ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
423
424	def test_break_on_hyphens(self):
425	# Ensure that the break_on_hyphens attributes work
426	text = "yaba daba-doo"
427	self.check_wrap(text, 10, ["yaba daba-", "doo"],
428	break_on_hyphens=True)
429	self.check_wrap(text, 10, ["yaba", "daba-doo"],
430	break_on_hyphens=False)
431
432	def test_bad_width(self):
433	# Ensure that width <= 0 is caught.
434	text = "Whatever, it doesn't matter."
435	self.assertRaises(ValueError, wrap, text, 0)
436	self.assertRaises(ValueError, wrap, text, -1)
437
438
439	class LongWordTestCase (BaseTestCase):
440	def setUp(self):
441	self.wrapper = TextWrapper()
442	self.text = '''\
443	Did you say "supercalifragilisticexpialidocious?"
444	How do you spell that odd word, anyways?
445	'''
446
447	def test_break_long(self):
448	# Wrap text with long words and lots of punctuation
449
450	self.check_wrap(self.text, 30,
451	['Did you say "supercalifragilis',
452	'ticexpialidocious?" How do',
453	'you spell that odd word,',
454	'anyways?'])
455	self.check_wrap(self.text, 50,
456	['Did you say "supercalifragilisticexpialidocious?"',
457	'How do you spell that odd word, anyways?'])
458
459	# SF bug 797650. Prevent an infinite loop by making sure that at
460	# least one character gets split off on every pass.
461	self.check_wrap('-'*10+'hello', 10,
462	['----------',
463	' h',
464	' e',
465	' l',
466	' l',
467	' o'],
468	subsequent_indent = ' '*15)
469
470	# bug 1146. Prevent a long word to be wrongly wrapped when the
471	# preceding word is exactly one character shorter than the width
472	self.check_wrap(self.text, 12,
473	['Did you say ',
474	'"supercalifr',
475	'agilisticexp',
476	'ialidocious?',
477	'" How do',
478	'you spell',
479	'that odd',
480	'word,',
481	'anyways?'])
482
483	def test_nobreak_long(self):
484	# Test with break_long_words disabled
485	self.wrapper.break_long_words = 0
486	self.wrapper.width = 30
487	expect = ['Did you say',
488	'"supercalifragilisticexpialidocious?"',
489	'How do you spell that odd',
490	'word, anyways?'
491	]
492	result = self.wrapper.wrap(self.text)
493	self.check(result, expect)
494
495	# Same thing with kwargs passed to standalone wrap() function.
496	result = wrap(self.text, width=30, break_long_words=0)
497	self.check(result, expect)
498
499
500	class IndentTestCases(BaseTestCase):
501
502	# called before each test method
503	def setUp(self):
504	self.text = '''\
505	This paragraph will be filled, first without any indentation,
506	and then with some (including a hanging indent).'''
507
508
509	def test_fill(self):
510	# Test the fill() method
511
512	expect = '''\
513	This paragraph will be filled, first
514	without any indentation, and then with
515	some (including a hanging indent).'''
516
517	result = fill(self.text, 40)
518	self.check(result, expect)
519
520
521	def test_initial_indent(self):
522	# Test initial_indent parameter
523
524	expect = [" This paragraph will be filled,",
525	"first without any indentation, and then",
526	"with some (including a hanging indent)."]
527	result = wrap(self.text, 40, initial_indent=" ")
528	self.check(result, expect)
529
530	expect = "\n".join(expect)
531	result = fill(self.text, 40, initial_indent=" ")
532	self.check(result, expect)
533
534
535	def test_subsequent_indent(self):
536	# Test subsequent_indent parameter
537
538	expect = '''\
539	* This paragraph will be filled, first
540	without any indentation, and then
541	with some (including a hanging
542	indent).'''
543
544	result = fill(self.text, 40,
545	initial_indent=" * ", subsequent_indent=" ")
546	self.check(result, expect)
547
548
549	# Despite the similar names, DedentTestCase is not the inverse
550	# of IndentTestCase!
551	class DedentTestCase(unittest.TestCase):
552
553	def assertUnchanged(self, text):
554	"""assert that dedent() has no effect on 'text'"""
555	self.assertEqual(text, dedent(text))
556
557	def test_dedent_nomargin(self):
558	# No lines indented.
559	text = "Hello there.\nHow are you?\nOh good, I'm glad."
560	self.assertUnchanged(text)
561
562	# Similar, with a blank line.
563	text = "Hello there.\n\nBoo!"
564	self.assertUnchanged(text)
565
566	# Some lines indented, but overall margin is still zero.
567	text = "Hello there.\n This is indented."
568	self.assertUnchanged(text)
569
570	# Again, add a blank line.
571	text = "Hello there.\n\n Boo!\n"
572	self.assertUnchanged(text)
573
574	def test_dedent_even(self):
575	# All lines indented by two spaces.
576	text = " Hello there.\n How are ya?\n Oh good."
577	expect = "Hello there.\nHow are ya?\nOh good."
578	self.assertEqual(expect, dedent(text))
579
580	# Same, with blank lines.
581	text = " Hello there.\n\n How are ya?\n Oh good.\n"
582	expect = "Hello there.\n\nHow are ya?\nOh good.\n"
583	self.assertEqual(expect, dedent(text))
584
585	# Now indent one of the blank lines.
586	text = " Hello there.\n \n How are ya?\n Oh good.\n"
587	expect = "Hello there.\n\nHow are ya?\nOh good.\n"
588	self.assertEqual(expect, dedent(text))
589
590	def test_dedent_uneven(self):
591	# Lines indented unevenly.
592	text = '''\
593	def foo():
594	while 1:
595	return foo
596	'''
597	expect = '''\
598	def foo():
599	while 1:
600	return foo
601	'''
602	self.assertEqual(expect, dedent(text))
603
604	# Uneven indentation with a blank line.
605	text = " Foo\n Bar\n\n Baz\n"
606	expect = "Foo\n Bar\n\n Baz\n"
607	self.assertEqual(expect, dedent(text))
608
609	# Uneven indentation with a whitespace-only line.
610	text = " Foo\n Bar\n \n Baz\n"
611	expect = "Foo\n Bar\n\n Baz\n"
612	self.assertEqual(expect, dedent(text))
613
614	# dedent() should not mangle internal tabs
615	def test_dedent_preserve_internal_tabs(self):
616	text = " hello\tthere\n how are\tyou?"
617	expect = "hello\tthere\nhow are\tyou?"
618	self.assertEqual(expect, dedent(text))
619
620	# make sure that it preserves tabs when it's not making any
621	# changes at all
622	self.assertEqual(expect, dedent(expect))
623
624	# dedent() should not mangle tabs in the margin (i.e.
625	# tabs and spaces both count as margin, but are not
626	# considered equivalent)
627	def test_dedent_preserve_margin_tabs(self):
628	text = " hello there\n\thow are you?"
629	self.assertUnchanged(text)
630
631	# same effect even if we have 8 spaces
632	text = " hello there\n\thow are you?"
633	self.assertUnchanged(text)
634
635	# dedent() only removes whitespace that can be uniformly removed!
636	text = "\thello there\n\thow are you?"
637	expect = "hello there\nhow are you?"
638	self.assertEqual(expect, dedent(text))
639
640	text = " \thello there\n \thow are you?"
641	self.assertEqual(expect, dedent(text))
642
643	text = " \t hello there\n \t how are you?"
644	self.assertEqual(expect, dedent(text))
645
646	text = " \thello there\n \t how are you?"
647	expect = "hello there\n how are you?"
648	self.assertEqual(expect, dedent(text))
649
650
651	def test_main():
652	test_support.run_unittest(WrapTestCase,
653	LongWordTestCase,
654	IndentTestCases,
655	DedentTestCase)
656
657	if __name__ == '__main__':
658	test_main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/test/test_textwrap.py

Download in other formats: