source: python/trunk/Lib/test/test_textwrap.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 25.0 KB
Line 
1#
2# Test suite for the textwrap module.
3#
4# Original tests written by Greg Ward <gward@python.net>.
5# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6# Currently maintained by Greg Ward.
7#
8# $Id$
9#
10
11import unittest
12from test import test_support
13
14from textwrap import TextWrapper, wrap, fill, dedent
15
16
17class BaseTestCase(unittest.TestCase):
18 '''Parent class with utility methods for textwrap tests.'''
19
20 def show(self, textin):
21 if isinstance(textin, list):
22 result = []
23 for i in range(len(textin)):
24 result.append(" %d: %r" % (i, textin[i]))
25 result = '\n'.join(result)
26 elif isinstance(textin, basestring):
27 result = " %s\n" % repr(textin)
28 return result
29
30
31 def check(self, result, expect):
32 self.assertEqual(result, expect,
33 'expected:\n%s\nbut got:\n%s' % (
34 self.show(expect), self.show(result)))
35
36 def check_wrap(self, text, width, expect, **kwargs):
37 result = wrap(text, width, **kwargs)
38 self.check(result, expect)
39
40 def check_split(self, text, expect):
41 result = self.wrapper._split(text)
42 self.assertEqual(result, expect,
43 "\nexpected %r\n"
44 "but got %r" % (expect, result))
45
46
47class WrapTestCase(BaseTestCase):
48
49 def setUp(self):
50 self.wrapper = TextWrapper(width=45)
51
52 def test_simple(self):
53 # Simple case: just words, spaces, and a bit of punctuation
54
55 text = "Hello there, how are you this fine day? I'm glad to hear it!"
56
57 self.check_wrap(text, 12,
58 ["Hello there,",
59 "how are you",
60 "this fine",
61 "day? I'm",
62 "glad to hear",
63 "it!"])
64 self.check_wrap(text, 42,
65 ["Hello there, how are you this fine day?",
66 "I'm glad to hear it!"])
67 self.check_wrap(text, 80, [text])
68
69 def test_empty_string(self):
70 # Check that wrapping the empty string returns an empty list.
71 self.check_wrap("", 6, [])
72 self.check_wrap("", 6, [], drop_whitespace=False)
73
74 def test_empty_string_with_initial_indent(self):
75 # Check that the empty string is not indented.
76 self.check_wrap("", 6, [], initial_indent="++")
77 self.check_wrap("", 6, [], initial_indent="++", drop_whitespace=False)
78
79 def test_whitespace(self):
80 # Whitespace munging and end-of-sentence detection
81
82 text = """\
83This is a paragraph that already has
84line breaks. But some of its lines are much longer than the others,
85so it needs to be wrapped.
86Some lines are \ttabbed too.
87What a mess!
88"""
89
90 expect = ["This is a paragraph that already has line",
91 "breaks. But some of its lines are much",
92 "longer than the others, so it needs to be",
93 "wrapped. Some lines are tabbed too. What a",
94 "mess!"]
95
96 wrapper = TextWrapper(45, fix_sentence_endings=True)
97 result = wrapper.wrap(text)
98 self.check(result, expect)
99
100 result = wrapper.fill(text)
101 self.check(result, '\n'.join(expect))
102
103 def test_fix_sentence_endings(self):
104 wrapper = TextWrapper(60, fix_sentence_endings=True)
105
106 # SF #847346: ensure that fix_sentence_endings=True does the
107 # right thing even on input short enough that it doesn't need to
108 # be wrapped.
109 text = "A short line. Note the single space."
110 expect = ["A short line. Note the single space."]
111 self.check(wrapper.wrap(text), expect)
112
113 # Test some of the hairy end cases that _fix_sentence_endings()
114 # is supposed to handle (the easy stuff is tested in
115 # test_whitespace() above).
116 text = "Well, Doctor? What do you think?"
117 expect = ["Well, Doctor? What do you think?"]
118 self.check(wrapper.wrap(text), expect)
119
120 text = "Well, Doctor?\nWhat do you think?"
121 self.check(wrapper.wrap(text), expect)
122
123 text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
124 expect = ['I say, chaps! Anyone for "tennis?" Hmmph!']
125 self.check(wrapper.wrap(text), expect)
126
127 wrapper.width = 20
128 expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
129 self.check(wrapper.wrap(text), expect)
130
131 text = 'And she said, "Go to hell!"\nCan you believe that?'
132 expect = ['And she said, "Go to',
133 'hell!" Can you',
134 'believe that?']
135 self.check(wrapper.wrap(text), expect)
136
137 wrapper.width = 60
138 expect = ['And she said, "Go to hell!" Can you believe that?']
139 self.check(wrapper.wrap(text), expect)
140
141 text = 'File stdio.h is nice.'
142 expect = ['File stdio.h is nice.']
143 self.check(wrapper.wrap(text), expect)
144
145 def test_wrap_short(self):
146 # Wrapping to make short lines longer
147
148 text = "This is a\nshort paragraph."
149
150 self.check_wrap(text, 20, ["This is a short",
151 "paragraph."])
152 self.check_wrap(text, 40, ["This is a short paragraph."])
153
154
155 def test_wrap_short_1line(self):
156 # Test endcases
157
158 text = "This is a short line."
159
160 self.check_wrap(text, 30, ["This is a short line."])
161 self.check_wrap(text, 30, ["(1) This is a short line."],
162 initial_indent="(1) ")
163
164
165 def test_hyphenated(self):
166 # Test breaking hyphenated words
167
168 text = ("this-is-a-useful-feature-for-"
169 "reformatting-posts-from-tim-peters'ly")
170
171 self.check_wrap(text, 40,
172 ["this-is-a-useful-feature-for-",
173 "reformatting-posts-from-tim-peters'ly"])
174 self.check_wrap(text, 41,
175 ["this-is-a-useful-feature-for-",
176 "reformatting-posts-from-tim-peters'ly"])
177 self.check_wrap(text, 42,
178 ["this-is-a-useful-feature-for-reformatting-",
179 "posts-from-tim-peters'ly"])
180
181 def test_hyphenated_numbers(self):
182 # Test that hyphenated numbers (eg. dates) are not broken like words.
183 text = ("Python 1.0.0 was released on 1994-01-26. Python 1.0.1 was\n"
184 "released on 1994-02-15.")
185
186 self.check_wrap(text, 35, ['Python 1.0.0 was released on',
187 '1994-01-26. Python 1.0.1 was',
188 'released on 1994-02-15.'])
189 self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
190 'Python 1.0.1 was released on 1994-02-15.'])
191
192 text = "I do all my shopping at 7-11."
193 self.check_wrap(text, 25, ["I do all my shopping at",
194 "7-11."])
195 self.check_wrap(text, 27, ["I do all my shopping at",
196 "7-11."])
197 self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
198
199 def test_em_dash(self):
200 # Test text with em-dashes
201 text = "Em-dashes should be written -- thus."
202 self.check_wrap(text, 25,
203 ["Em-dashes should be",
204 "written -- thus."])
205
206 # Probe the boundaries of the properly written em-dash,
207 # ie. " -- ".
208 self.check_wrap(text, 29,
209 ["Em-dashes should be written",
210 "-- thus."])
211 expect = ["Em-dashes should be written --",
212 "thus."]
213 self.check_wrap(text, 30, expect)
214 self.check_wrap(text, 35, expect)
215 self.check_wrap(text, 36,
216 ["Em-dashes should be written -- thus."])
217
218 # The improperly written em-dash is handled too, because
219 # it's adjacent to non-whitespace on both sides.
220 text = "You can also do--this or even---this."
221 expect = ["You can also do",
222 "--this or even",
223 "---this."]
224 self.check_wrap(text, 15, expect)
225 self.check_wrap(text, 16, expect)
226 expect = ["You can also do--",
227 "this or even---",
228 "this."]
229 self.check_wrap(text, 17, expect)
230 self.check_wrap(text, 19, expect)
231 expect = ["You can also do--this or even",
232 "---this."]
233 self.check_wrap(text, 29, expect)
234 self.check_wrap(text, 31, expect)
235 expect = ["You can also do--this or even---",
236 "this."]
237 self.check_wrap(text, 32, expect)
238 self.check_wrap(text, 35, expect)
239
240 # All of the above behaviour could be deduced by probing the
241 # _split() method.
242 text = "Here's an -- em-dash and--here's another---and another!"
243 expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
244 "and", "--", "here's", " ", "another", "---",
245 "and", " ", "another!"]
246 self.check_split(text, expect)
247
248 text = "and then--bam!--he was gone"
249 expect = ["and", " ", "then", "--", "bam!", "--",
250 "he", " ", "was", " ", "gone"]
251 self.check_split(text, expect)
252
253
254 def test_unix_options (self):
255 # Test that Unix-style command-line options are wrapped correctly.
256 # Both Optik (OptionParser) and Docutils rely on this behaviour!
257
258 text = "You should use the -n option, or --dry-run in its long form."
259 self.check_wrap(text, 20,
260 ["You should use the",
261 "-n option, or --dry-",
262 "run in its long",
263 "form."])
264 self.check_wrap(text, 21,
265 ["You should use the -n",
266 "option, or --dry-run",
267 "in its long form."])
268 expect = ["You should use the -n option, or",
269 "--dry-run in its long form."]
270 self.check_wrap(text, 32, expect)
271 self.check_wrap(text, 34, expect)
272 self.check_wrap(text, 35, expect)
273 self.check_wrap(text, 38, expect)
274 expect = ["You should use the -n option, or --dry-",
275 "run in its long form."]
276 self.check_wrap(text, 39, expect)
277 self.check_wrap(text, 41, expect)
278 expect = ["You should use the -n option, or --dry-run",
279 "in its long form."]
280 self.check_wrap(text, 42, expect)
281
282 # Again, all of the above can be deduced from _split().
283 text = "the -n option, or --dry-run or --dryrun"
284 expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
285 "--dry-", "run", " ", "or", " ", "--dryrun"]
286 self.check_split(text, expect)
287
288 def test_funky_hyphens (self):
289 # Screwy edge cases cooked up by David Goodger. All reported
290 # in SF bug #596434.
291 self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
292 self.check_split("what the--", ["what", " ", "the--"])
293 self.check_split("what the--.", ["what", " ", "the--."])
294 self.check_split("--text--.", ["--text--."])
295
296 # When I first read bug #596434, this is what I thought David
297 # was talking about. I was wrong; these have always worked
298 # fine. The real problem is tested in test_funky_parens()
299 # below...
300 self.check_split("--option", ["--option"])
301 self.check_split("--option-opt", ["--option-", "opt"])
302 self.check_split("foo --option-opt bar",
303 ["foo", " ", "--option-", "opt", " ", "bar"])
304
305 def test_punct_hyphens(self):
306 # Oh bother, SF #965425 found another problem with hyphens --
307 # hyphenated words in single quotes weren't handled correctly.
308 # In fact, the bug is that *any* punctuation around a hyphenated
309 # word was handled incorrectly, except for a leading "--", which
310 # was special-cased for Optik and Docutils. So test a variety
311 # of styles of punctuation around a hyphenated word.
312 # (Actually this is based on an Optik bug report, #813077).
313 self.check_split("the 'wibble-wobble' widget",
314 ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
315 self.check_split('the "wibble-wobble" widget',
316 ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
317 self.check_split("the (wibble-wobble) widget",
318 ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
319 self.check_split("the ['wibble-wobble'] widget",
320 ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
321
322 def test_funky_parens (self):
323 # Second part of SF bug #596434: long option strings inside
324 # parentheses.
325 self.check_split("foo (--option) bar",
326 ["foo", " ", "(--option)", " ", "bar"])
327
328 # Related stuff -- make sure parens work in simpler contexts.
329 self.check_split("foo (bar) baz",
330 ["foo", " ", "(bar)", " ", "baz"])
331 self.check_split("blah (ding dong), wubba",
332 ["blah", " ", "(ding", " ", "dong),",
333 " ", "wubba"])
334
335 def test_drop_whitespace_false(self):
336 # Check that drop_whitespace=False preserves whitespace.
337 # SF patch #1581073
338 text = " This is a sentence with much whitespace."
339 self.check_wrap(text, 10,
340 [" This is a", " ", "sentence ",
341 "with ", "much white", "space."],
342 drop_whitespace=False)
343
344 def test_drop_whitespace_false_whitespace_only(self):
345 # Check that drop_whitespace=False preserves a whitespace-only string.
346 self.check_wrap(" ", 6, [" "], drop_whitespace=False)
347
348 def test_drop_whitespace_false_whitespace_only_with_indent(self):
349 # Check that a whitespace-only string gets indented (when
350 # drop_whitespace is False).
351 self.check_wrap(" ", 6, [" "], drop_whitespace=False,
352 initial_indent=" ")
353
354 def test_drop_whitespace_whitespace_only(self):
355 # Check drop_whitespace on a whitespace-only string.
356 self.check_wrap(" ", 6, [])
357
358 def test_drop_whitespace_leading_whitespace(self):
359 # Check that drop_whitespace does not drop leading whitespace (if
360 # followed by non-whitespace).
361 # SF bug #622849 reported inconsistent handling of leading
362 # whitespace; let's test that a bit, shall we?
363 text = " This is a sentence with leading whitespace."
364 self.check_wrap(text, 50,
365 [" This is a sentence with leading whitespace."])
366 self.check_wrap(text, 30,
367 [" This is a sentence with", "leading whitespace."])
368
369 def test_drop_whitespace_whitespace_line(self):
370 # Check that drop_whitespace skips the whole line if a non-leading
371 # line consists only of whitespace.
372 text = "abcd efgh"
373 # Include the result for drop_whitespace=False for comparison.
374 self.check_wrap(text, 6, ["abcd", " ", "efgh"],
375 drop_whitespace=False)
376 self.check_wrap(text, 6, ["abcd", "efgh"])
377
378 def test_drop_whitespace_whitespace_only_with_indent(self):
379 # Check that initial_indent is not applied to a whitespace-only
380 # string. This checks a special case of the fact that dropping
381 # whitespace occurs before indenting.
382 self.check_wrap(" ", 6, [], initial_indent="++")
383
384 def test_drop_whitespace_whitespace_indent(self):
385 # Check that drop_whitespace does not drop whitespace indents.
386 # This checks a special case of the fact that dropping whitespace
387 # occurs before indenting.
388 self.check_wrap("abcd efgh", 6, [" abcd", " efgh"],
389 initial_indent=" ", subsequent_indent=" ")
390
391 if test_support.have_unicode:
392 def test_unicode(self):
393 # *Very* simple test of wrapping Unicode strings. I'm sure
394 # there's more to it than this, but let's at least make
395 # sure textwrap doesn't crash on Unicode input!
396 text = u"Hello there, how are you today?"
397 self.check_wrap(text, 50, [u"Hello there, how are you today?"])
398 self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
399 olines = self.wrapper.wrap(text)
400 self.assertIsInstance(olines, list)
401 self.assertIsInstance(olines[0], unicode)
402 otext = self.wrapper.fill(text)
403 self.assertIsInstance(otext, unicode)
404
405 def test_no_split_at_umlaut(self):
406 text = u"Die Empf\xe4nger-Auswahl"
407 self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
408
409 def test_umlaut_followed_by_dash(self):
410 text = u"aa \xe4\xe4-\xe4\xe4"
411 self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
412
413 def test_split(self):
414 # Ensure that the standard _split() method works as advertised
415 # in the comments
416
417 text = "Hello there -- you goof-ball, use the -b option!"
418
419 result = self.wrapper._split(text)
420 self.check(result,
421 ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
422 "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
423
424 def test_break_on_hyphens(self):
425 # Ensure that the break_on_hyphens attributes work
426 text = "yaba daba-doo"
427 self.check_wrap(text, 10, ["yaba daba-", "doo"],
428 break_on_hyphens=True)
429 self.check_wrap(text, 10, ["yaba", "daba-doo"],
430 break_on_hyphens=False)
431
432 def test_bad_width(self):
433 # Ensure that width <= 0 is caught.
434 text = "Whatever, it doesn't matter."
435 self.assertRaises(ValueError, wrap, text, 0)
436 self.assertRaises(ValueError, wrap, text, -1)
437
438
439class LongWordTestCase (BaseTestCase):
440 def setUp(self):
441 self.wrapper = TextWrapper()
442 self.text = '''\
443Did you say "supercalifragilisticexpialidocious?"
444How *do* you spell that odd word, anyways?
445'''
446
447 def test_break_long(self):
448 # Wrap text with long words and lots of punctuation
449
450 self.check_wrap(self.text, 30,
451 ['Did you say "supercalifragilis',
452 'ticexpialidocious?" How *do*',
453 'you spell that odd word,',
454 'anyways?'])
455 self.check_wrap(self.text, 50,
456 ['Did you say "supercalifragilisticexpialidocious?"',
457 'How *do* you spell that odd word, anyways?'])
458
459 # SF bug 797650. Prevent an infinite loop by making sure that at
460 # least one character gets split off on every pass.
461 self.check_wrap('-'*10+'hello', 10,
462 ['----------',
463 ' h',
464 ' e',
465 ' l',
466 ' l',
467 ' o'],
468 subsequent_indent = ' '*15)
469
470 # bug 1146. Prevent a long word to be wrongly wrapped when the
471 # preceding word is exactly one character shorter than the width
472 self.check_wrap(self.text, 12,
473 ['Did you say ',
474 '"supercalifr',
475 'agilisticexp',
476 'ialidocious?',
477 '" How *do*',
478 'you spell',
479 'that odd',
480 'word,',
481 'anyways?'])
482
483 def test_nobreak_long(self):
484 # Test with break_long_words disabled
485 self.wrapper.break_long_words = 0
486 self.wrapper.width = 30
487 expect = ['Did you say',
488 '"supercalifragilisticexpialidocious?"',
489 'How *do* you spell that odd',
490 'word, anyways?'
491 ]
492 result = self.wrapper.wrap(self.text)
493 self.check(result, expect)
494
495 # Same thing with kwargs passed to standalone wrap() function.
496 result = wrap(self.text, width=30, break_long_words=0)
497 self.check(result, expect)
498
499
500class IndentTestCases(BaseTestCase):
501
502 # called before each test method
503 def setUp(self):
504 self.text = '''\
505This paragraph will be filled, first without any indentation,
506and then with some (including a hanging indent).'''
507
508
509 def test_fill(self):
510 # Test the fill() method
511
512 expect = '''\
513This paragraph will be filled, first
514without any indentation, and then with
515some (including a hanging indent).'''
516
517 result = fill(self.text, 40)
518 self.check(result, expect)
519
520
521 def test_initial_indent(self):
522 # Test initial_indent parameter
523
524 expect = [" This paragraph will be filled,",
525 "first without any indentation, and then",
526 "with some (including a hanging indent)."]
527 result = wrap(self.text, 40, initial_indent=" ")
528 self.check(result, expect)
529
530 expect = "\n".join(expect)
531 result = fill(self.text, 40, initial_indent=" ")
532 self.check(result, expect)
533
534
535 def test_subsequent_indent(self):
536 # Test subsequent_indent parameter
537
538 expect = '''\
539 * This paragraph will be filled, first
540 without any indentation, and then
541 with some (including a hanging
542 indent).'''
543
544 result = fill(self.text, 40,
545 initial_indent=" * ", subsequent_indent=" ")
546 self.check(result, expect)
547
548
549# Despite the similar names, DedentTestCase is *not* the inverse
550# of IndentTestCase!
551class DedentTestCase(unittest.TestCase):
552
553 def assertUnchanged(self, text):
554 """assert that dedent() has no effect on 'text'"""
555 self.assertEqual(text, dedent(text))
556
557 def test_dedent_nomargin(self):
558 # No lines indented.
559 text = "Hello there.\nHow are you?\nOh good, I'm glad."
560 self.assertUnchanged(text)
561
562 # Similar, with a blank line.
563 text = "Hello there.\n\nBoo!"
564 self.assertUnchanged(text)
565
566 # Some lines indented, but overall margin is still zero.
567 text = "Hello there.\n This is indented."
568 self.assertUnchanged(text)
569
570 # Again, add a blank line.
571 text = "Hello there.\n\n Boo!\n"
572 self.assertUnchanged(text)
573
574 def test_dedent_even(self):
575 # All lines indented by two spaces.
576 text = " Hello there.\n How are ya?\n Oh good."
577 expect = "Hello there.\nHow are ya?\nOh good."
578 self.assertEqual(expect, dedent(text))
579
580 # Same, with blank lines.
581 text = " Hello there.\n\n How are ya?\n Oh good.\n"
582 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
583 self.assertEqual(expect, dedent(text))
584
585 # Now indent one of the blank lines.
586 text = " Hello there.\n \n How are ya?\n Oh good.\n"
587 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
588 self.assertEqual(expect, dedent(text))
589
590 def test_dedent_uneven(self):
591 # Lines indented unevenly.
592 text = '''\
593 def foo():
594 while 1:
595 return foo
596 '''
597 expect = '''\
598def foo():
599 while 1:
600 return foo
601'''
602 self.assertEqual(expect, dedent(text))
603
604 # Uneven indentation with a blank line.
605 text = " Foo\n Bar\n\n Baz\n"
606 expect = "Foo\n Bar\n\n Baz\n"
607 self.assertEqual(expect, dedent(text))
608
609 # Uneven indentation with a whitespace-only line.
610 text = " Foo\n Bar\n \n Baz\n"
611 expect = "Foo\n Bar\n\n Baz\n"
612 self.assertEqual(expect, dedent(text))
613
614 # dedent() should not mangle internal tabs
615 def test_dedent_preserve_internal_tabs(self):
616 text = " hello\tthere\n how are\tyou?"
617 expect = "hello\tthere\nhow are\tyou?"
618 self.assertEqual(expect, dedent(text))
619
620 # make sure that it preserves tabs when it's not making any
621 # changes at all
622 self.assertEqual(expect, dedent(expect))
623
624 # dedent() should not mangle tabs in the margin (i.e.
625 # tabs and spaces both count as margin, but are *not*
626 # considered equivalent)
627 def test_dedent_preserve_margin_tabs(self):
628 text = " hello there\n\thow are you?"
629 self.assertUnchanged(text)
630
631 # same effect even if we have 8 spaces
632 text = " hello there\n\thow are you?"
633 self.assertUnchanged(text)
634
635 # dedent() only removes whitespace that can be uniformly removed!
636 text = "\thello there\n\thow are you?"
637 expect = "hello there\nhow are you?"
638 self.assertEqual(expect, dedent(text))
639
640 text = " \thello there\n \thow are you?"
641 self.assertEqual(expect, dedent(text))
642
643 text = " \t hello there\n \t how are you?"
644 self.assertEqual(expect, dedent(text))
645
646 text = " \thello there\n \t how are you?"
647 expect = "hello there\n how are you?"
648 self.assertEqual(expect, dedent(text))
649
650
651def test_main():
652 test_support.run_unittest(WrapTestCase,
653 LongWordTestCase,
654 IndentTestCases,
655 DedentTestCase)
656
657if __name__ == '__main__':
658 test_main()
Note: See TracBrowser for help on using the repository browser.