1 | # Pod::PlainText -- Convert POD data to formatted ASCII text.
|
---|
2 | # $Id: Text.pm,v 2.1 1999/09/20 11:53:33 eagle Exp $
|
---|
3 | #
|
---|
4 | # Copyright 1999-2000 by Russ Allbery <rra@stanford.edu>
|
---|
5 | #
|
---|
6 | # This program is free software; you can redistribute it and/or modify it
|
---|
7 | # under the same terms as Perl itself.
|
---|
8 | #
|
---|
9 | # This module is intended to be a replacement for Pod::Text, and attempts to
|
---|
10 | # match its output except for some specific circumstances where other
|
---|
11 | # decisions seemed to produce better output. It uses Pod::Parser and is
|
---|
12 | # designed to be very easy to subclass.
|
---|
13 |
|
---|
14 | ############################################################################
|
---|
15 | # Modules and declarations
|
---|
16 | ############################################################################
|
---|
17 |
|
---|
18 | package Pod::PlainText;
|
---|
19 |
|
---|
20 | require 5.005;
|
---|
21 |
|
---|
22 | use Carp qw(carp croak);
|
---|
23 | use Pod::Select ();
|
---|
24 |
|
---|
25 | use strict;
|
---|
26 | use vars qw(@ISA %ESCAPES $VERSION);
|
---|
27 |
|
---|
28 | # We inherit from Pod::Select instead of Pod::Parser so that we can be used
|
---|
29 | # by Pod::Usage.
|
---|
30 | @ISA = qw(Pod::Select);
|
---|
31 |
|
---|
32 | $VERSION = '2.02';
|
---|
33 |
|
---|
34 |
|
---|
35 | ############################################################################
|
---|
36 | # Table of supported E<> escapes
|
---|
37 | ############################################################################
|
---|
38 |
|
---|
39 | # This table is taken near verbatim from Pod::PlainText in Pod::Parser,
|
---|
40 | # which got it near verbatim from the original Pod::Text. It is therefore
|
---|
41 | # credited to Tom Christiansen, and I'm glad I didn't have to write it. :)
|
---|
42 | %ESCAPES = (
|
---|
43 | 'amp' => '&', # ampersand
|
---|
44 | 'lt' => '<', # left chevron, less-than
|
---|
45 | 'gt' => '>', # right chevron, greater-than
|
---|
46 | 'quot' => '"', # double quote
|
---|
47 |
|
---|
48 | "Aacute" => "\xC1", # capital A, acute accent
|
---|
49 | "aacute" => "\xE1", # small a, acute accent
|
---|
50 | "Acirc" => "\xC2", # capital A, circumflex accent
|
---|
51 | "acirc" => "\xE2", # small a, circumflex accent
|
---|
52 | "AElig" => "\xC6", # capital AE diphthong (ligature)
|
---|
53 | "aelig" => "\xE6", # small ae diphthong (ligature)
|
---|
54 | "Agrave" => "\xC0", # capital A, grave accent
|
---|
55 | "agrave" => "\xE0", # small a, grave accent
|
---|
56 | "Aring" => "\xC5", # capital A, ring
|
---|
57 | "aring" => "\xE5", # small a, ring
|
---|
58 | "Atilde" => "\xC3", # capital A, tilde
|
---|
59 | "atilde" => "\xE3", # small a, tilde
|
---|
60 | "Auml" => "\xC4", # capital A, dieresis or umlaut mark
|
---|
61 | "auml" => "\xE4", # small a, dieresis or umlaut mark
|
---|
62 | "Ccedil" => "\xC7", # capital C, cedilla
|
---|
63 | "ccedil" => "\xE7", # small c, cedilla
|
---|
64 | "Eacute" => "\xC9", # capital E, acute accent
|
---|
65 | "eacute" => "\xE9", # small e, acute accent
|
---|
66 | "Ecirc" => "\xCA", # capital E, circumflex accent
|
---|
67 | "ecirc" => "\xEA", # small e, circumflex accent
|
---|
68 | "Egrave" => "\xC8", # capital E, grave accent
|
---|
69 | "egrave" => "\xE8", # small e, grave accent
|
---|
70 | "ETH" => "\xD0", # capital Eth, Icelandic
|
---|
71 | "eth" => "\xF0", # small eth, Icelandic
|
---|
72 | "Euml" => "\xCB", # capital E, dieresis or umlaut mark
|
---|
73 | "euml" => "\xEB", # small e, dieresis or umlaut mark
|
---|
74 | "Iacute" => "\xCD", # capital I, acute accent
|
---|
75 | "iacute" => "\xED", # small i, acute accent
|
---|
76 | "Icirc" => "\xCE", # capital I, circumflex accent
|
---|
77 | "icirc" => "\xEE", # small i, circumflex accent
|
---|
78 | "Igrave" => "\xCD", # capital I, grave accent
|
---|
79 | "igrave" => "\xED", # small i, grave accent
|
---|
80 | "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
|
---|
81 | "iuml" => "\xEF", # small i, dieresis or umlaut mark
|
---|
82 | "Ntilde" => "\xD1", # capital N, tilde
|
---|
83 | "ntilde" => "\xF1", # small n, tilde
|
---|
84 | "Oacute" => "\xD3", # capital O, acute accent
|
---|
85 | "oacute" => "\xF3", # small o, acute accent
|
---|
86 | "Ocirc" => "\xD4", # capital O, circumflex accent
|
---|
87 | "ocirc" => "\xF4", # small o, circumflex accent
|
---|
88 | "Ograve" => "\xD2", # capital O, grave accent
|
---|
89 | "ograve" => "\xF2", # small o, grave accent
|
---|
90 | "Oslash" => "\xD8", # capital O, slash
|
---|
91 | "oslash" => "\xF8", # small o, slash
|
---|
92 | "Otilde" => "\xD5", # capital O, tilde
|
---|
93 | "otilde" => "\xF5", # small o, tilde
|
---|
94 | "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
|
---|
95 | "ouml" => "\xF6", # small o, dieresis or umlaut mark
|
---|
96 | "szlig" => "\xDF", # small sharp s, German (sz ligature)
|
---|
97 | "THORN" => "\xDE", # capital THORN, Icelandic
|
---|
98 | "thorn" => "\xFE", # small thorn, Icelandic
|
---|
99 | "Uacute" => "\xDA", # capital U, acute accent
|
---|
100 | "uacute" => "\xFA", # small u, acute accent
|
---|
101 | "Ucirc" => "\xDB", # capital U, circumflex accent
|
---|
102 | "ucirc" => "\xFB", # small u, circumflex accent
|
---|
103 | "Ugrave" => "\xD9", # capital U, grave accent
|
---|
104 | "ugrave" => "\xF9", # small u, grave accent
|
---|
105 | "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
|
---|
106 | "uuml" => "\xFC", # small u, dieresis or umlaut mark
|
---|
107 | "Yacute" => "\xDD", # capital Y, acute accent
|
---|
108 | "yacute" => "\xFD", # small y, acute accent
|
---|
109 | "yuml" => "\xFF", # small y, dieresis or umlaut mark
|
---|
110 |
|
---|
111 | "lchevron" => "\xAB", # left chevron (double less than)
|
---|
112 | "rchevron" => "\xBB", # right chevron (double greater than)
|
---|
113 | );
|
---|
114 |
|
---|
115 |
|
---|
116 | ############################################################################
|
---|
117 | # Initialization
|
---|
118 | ############################################################################
|
---|
119 |
|
---|
120 | # Initialize the object. Must be sure to call our parent initializer.
|
---|
121 | sub initialize {
|
---|
122 | my $self = shift;
|
---|
123 |
|
---|
124 | $$self{alt} = 0 unless defined $$self{alt};
|
---|
125 | $$self{indent} = 4 unless defined $$self{indent};
|
---|
126 | $$self{loose} = 0 unless defined $$self{loose};
|
---|
127 | $$self{sentence} = 0 unless defined $$self{sentence};
|
---|
128 | $$self{width} = 76 unless defined $$self{width};
|
---|
129 |
|
---|
130 | $$self{INDENTS} = []; # Stack of indentations.
|
---|
131 | $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
|
---|
132 |
|
---|
133 | $self->SUPER::initialize;
|
---|
134 | }
|
---|
135 |
|
---|
136 |
|
---|
137 | ############################################################################
|
---|
138 | # Core overrides
|
---|
139 | ############################################################################
|
---|
140 |
|
---|
141 | # Called for each command paragraph. Gets the command, the associated
|
---|
142 | # paragraph, the line number, and a Pod::Paragraph object. Just dispatches
|
---|
143 | # the command to a method named the same as the command. =cut is handled
|
---|
144 | # internally by Pod::Parser.
|
---|
145 | sub command {
|
---|
146 | my $self = shift;
|
---|
147 | my $command = shift;
|
---|
148 | return if $command eq 'pod';
|
---|
149 | return if ($$self{EXCLUDE} && $command ne 'end');
|
---|
150 | $self->item ("\n") if defined $$self{ITEM};
|
---|
151 | $command = 'cmd_' . $command;
|
---|
152 | $self->$command (@_);
|
---|
153 | }
|
---|
154 |
|
---|
155 | # Called for a verbatim paragraph. Gets the paragraph, the line number, and
|
---|
156 | # a Pod::Paragraph object. Just output it verbatim, but with tabs converted
|
---|
157 | # to spaces.
|
---|
158 | sub verbatim {
|
---|
159 | my $self = shift;
|
---|
160 | return if $$self{EXCLUDE};
|
---|
161 | $self->item if defined $$self{ITEM};
|
---|
162 | local $_ = shift;
|
---|
163 | return if /^\s*$/;
|
---|
164 | s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
|
---|
165 | $self->output ($_);
|
---|
166 | }
|
---|
167 |
|
---|
168 | # Called for a regular text block. Gets the paragraph, the line number, and
|
---|
169 | # a Pod::Paragraph object. Perform interpolation and output the results.
|
---|
170 | sub textblock {
|
---|
171 | my $self = shift;
|
---|
172 | return if $$self{EXCLUDE};
|
---|
173 | $self->output ($_[0]), return if $$self{VERBATIM};
|
---|
174 | local $_ = shift;
|
---|
175 | my $line = shift;
|
---|
176 |
|
---|
177 | # Perform a little magic to collapse multiple L<> references. This is
|
---|
178 | # here mostly for backwards-compatibility. We'll just rewrite the whole
|
---|
179 | # thing into actual text at this part, bypassing the whole internal
|
---|
180 | # sequence parsing thing.
|
---|
181 | s{
|
---|
182 | (
|
---|
183 | L< # A link of the form L</something>.
|
---|
184 | /
|
---|
185 | (
|
---|
186 | [:\w]+ # The item has to be a simple word...
|
---|
187 | (\(\))? # ...or simple function.
|
---|
188 | )
|
---|
189 | >
|
---|
190 | (
|
---|
191 | ,?\s+(and\s+)? # Allow lots of them, conjuncted.
|
---|
192 | L<
|
---|
193 | /
|
---|
194 | (
|
---|
195 | [:\w]+
|
---|
196 | (\(\))?
|
---|
197 | )
|
---|
198 | >
|
---|
199 | )+
|
---|
200 | )
|
---|
201 | } {
|
---|
202 | local $_ = $1;
|
---|
203 | s%L</([^>]+)>%$1%g;
|
---|
204 | my @items = split /(?:,?\s+(?:and\s+)?)/;
|
---|
205 | my $string = "the ";
|
---|
206 | my $i;
|
---|
207 | for ($i = 0; $i < @items; $i++) {
|
---|
208 | $string .= $items[$i];
|
---|
209 | $string .= ", " if @items > 2 && $i != $#items;
|
---|
210 | $string .= " and " if ($i == $#items - 1);
|
---|
211 | }
|
---|
212 | $string .= " entries elsewhere in this document";
|
---|
213 | $string;
|
---|
214 | }gex;
|
---|
215 |
|
---|
216 | # Now actually interpolate and output the paragraph.
|
---|
217 | $_ = $self->interpolate ($_, $line);
|
---|
218 | s/\s+$/\n/;
|
---|
219 | if (defined $$self{ITEM}) {
|
---|
220 | $self->item ($_ . "\n");
|
---|
221 | } else {
|
---|
222 | $self->output ($self->reformat ($_ . "\n"));
|
---|
223 | }
|
---|
224 | }
|
---|
225 |
|
---|
226 | # Called for an interior sequence. Gets the command, argument, and a
|
---|
227 | # Pod::InteriorSequence object and is expected to return the resulting text.
|
---|
228 | # Calls code, bold, italic, file, and link to handle those types of
|
---|
229 | # sequences, and handles S<>, E<>, X<>, and Z<> directly.
|
---|
230 | sub interior_sequence {
|
---|
231 | my $self = shift;
|
---|
232 | my $command = shift;
|
---|
233 | local $_ = shift;
|
---|
234 | return '' if ($command eq 'X' || $command eq 'Z');
|
---|
235 |
|
---|
236 | # Expand escapes into the actual character now, carping if invalid.
|
---|
237 | if ($command eq 'E') {
|
---|
238 | return $ESCAPES{$_} if defined $ESCAPES{$_};
|
---|
239 | carp "Unknown escape: E<$_>";
|
---|
240 | return "E<$_>";
|
---|
241 | }
|
---|
242 |
|
---|
243 | # For all the other sequences, empty content produces no output.
|
---|
244 | return if $_ eq '';
|
---|
245 |
|
---|
246 | # For S<>, compress all internal whitespace and then map spaces to \01.
|
---|
247 | # When we output the text, we'll map this back.
|
---|
248 | if ($command eq 'S') {
|
---|
249 | s/\s{2,}/ /g;
|
---|
250 | tr/ /\01/;
|
---|
251 | return $_;
|
---|
252 | }
|
---|
253 |
|
---|
254 | # Anything else needs to get dispatched to another method.
|
---|
255 | if ($command eq 'B') { return $self->seq_b ($_) }
|
---|
256 | elsif ($command eq 'C') { return $self->seq_c ($_) }
|
---|
257 | elsif ($command eq 'F') { return $self->seq_f ($_) }
|
---|
258 | elsif ($command eq 'I') { return $self->seq_i ($_) }
|
---|
259 | elsif ($command eq 'L') { return $self->seq_l ($_) }
|
---|
260 | else { carp "Unknown sequence $command<$_>" }
|
---|
261 | }
|
---|
262 |
|
---|
263 | # Called for each paragraph that's actually part of the POD. We take
|
---|
264 | # advantage of this opportunity to untabify the input.
|
---|
265 | sub preprocess_paragraph {
|
---|
266 | my $self = shift;
|
---|
267 | local $_ = shift;
|
---|
268 | 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
|
---|
269 | $_;
|
---|
270 | }
|
---|
271 |
|
---|
272 |
|
---|
273 | ############################################################################
|
---|
274 | # Command paragraphs
|
---|
275 | ############################################################################
|
---|
276 |
|
---|
277 | # All command paragraphs take the paragraph and the line number.
|
---|
278 |
|
---|
279 | # First level heading.
|
---|
280 | sub cmd_head1 {
|
---|
281 | my $self = shift;
|
---|
282 | local $_ = shift;
|
---|
283 | s/\s+$//;
|
---|
284 | $_ = $self->interpolate ($_, shift);
|
---|
285 | if ($$self{alt}) {
|
---|
286 | $self->output ("\n==== $_ ====\n\n");
|
---|
287 | } else {
|
---|
288 | $_ .= "\n" if $$self{loose};
|
---|
289 | $self->output ($_ . "\n");
|
---|
290 | }
|
---|
291 | }
|
---|
292 |
|
---|
293 | # Second level heading.
|
---|
294 | sub cmd_head2 {
|
---|
295 | my $self = shift;
|
---|
296 | local $_ = shift;
|
---|
297 | s/\s+$//;
|
---|
298 | $_ = $self->interpolate ($_, shift);
|
---|
299 | if ($$self{alt}) {
|
---|
300 | $self->output ("\n== $_ ==\n\n");
|
---|
301 | } else {
|
---|
302 | $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
|
---|
303 | }
|
---|
304 | }
|
---|
305 |
|
---|
306 | # Start a list.
|
---|
307 | sub cmd_over {
|
---|
308 | my $self = shift;
|
---|
309 | local $_ = shift;
|
---|
310 | unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
|
---|
311 | push (@{ $$self{INDENTS} }, $$self{MARGIN});
|
---|
312 | $$self{MARGIN} += ($_ + 0);
|
---|
313 | }
|
---|
314 |
|
---|
315 | # End a list.
|
---|
316 | sub cmd_back {
|
---|
317 | my $self = shift;
|
---|
318 | $$self{MARGIN} = pop @{ $$self{INDENTS} };
|
---|
319 | unless (defined $$self{MARGIN}) {
|
---|
320 | carp "Unmatched =back";
|
---|
321 | $$self{MARGIN} = $$self{indent};
|
---|
322 | }
|
---|
323 | }
|
---|
324 |
|
---|
325 | # An individual list item.
|
---|
326 | sub cmd_item {
|
---|
327 | my $self = shift;
|
---|
328 | if (defined $$self{ITEM}) { $self->item }
|
---|
329 | local $_ = shift;
|
---|
330 | s/\s+$//;
|
---|
331 | $$self{ITEM} = $self->interpolate ($_);
|
---|
332 | }
|
---|
333 |
|
---|
334 | # Begin a block for a particular translator. Setting VERBATIM triggers
|
---|
335 | # special handling in textblock().
|
---|
336 | sub cmd_begin {
|
---|
337 | my $self = shift;
|
---|
338 | local $_ = shift;
|
---|
339 | my ($kind) = /^(\S+)/ or return;
|
---|
340 | if ($kind eq 'text') {
|
---|
341 | $$self{VERBATIM} = 1;
|
---|
342 | } else {
|
---|
343 | $$self{EXCLUDE} = 1;
|
---|
344 | }
|
---|
345 | }
|
---|
346 |
|
---|
347 | # End a block for a particular translator. We assume that all =begin/=end
|
---|
348 | # pairs are properly closed.
|
---|
349 | sub cmd_end {
|
---|
350 | my $self = shift;
|
---|
351 | $$self{EXCLUDE} = 0;
|
---|
352 | $$self{VERBATIM} = 0;
|
---|
353 | }
|
---|
354 |
|
---|
355 | # One paragraph for a particular translator. Ignore it unless it's intended
|
---|
356 | # for text, in which case we treat it as a verbatim text block.
|
---|
357 | sub cmd_for {
|
---|
358 | my $self = shift;
|
---|
359 | local $_ = shift;
|
---|
360 | my $line = shift;
|
---|
361 | return unless s/^text\b[ \t]*\n?//;
|
---|
362 | $self->verbatim ($_, $line);
|
---|
363 | }
|
---|
364 |
|
---|
365 |
|
---|
366 | ############################################################################
|
---|
367 | # Interior sequences
|
---|
368 | ############################################################################
|
---|
369 |
|
---|
370 | # The simple formatting ones. These are here mostly so that subclasses can
|
---|
371 | # override them and do more complicated things.
|
---|
372 | sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
|
---|
373 | sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" }
|
---|
374 | sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
|
---|
375 | sub seq_i { return '*' . $_[1] . '*' }
|
---|
376 |
|
---|
377 | # The complicated one. Handle links. Since this is plain text, we can't
|
---|
378 | # actually make any real links, so this is all to figure out what text we
|
---|
379 | # print out.
|
---|
380 | sub seq_l {
|
---|
381 | my $self = shift;
|
---|
382 | local $_ = shift;
|
---|
383 |
|
---|
384 | # Smash whitespace in case we were split across multiple lines.
|
---|
385 | s/\s+/ /g;
|
---|
386 |
|
---|
387 | # If we were given any explicit text, just output it.
|
---|
388 | if (/^([^|]+)\|/) { return $1 }
|
---|
389 |
|
---|
390 | # Okay, leading and trailing whitespace isn't important; get rid of it.
|
---|
391 | s/^\s+//;
|
---|
392 | s/\s+$//;
|
---|
393 |
|
---|
394 | # Default to using the whole content of the link entry as a section
|
---|
395 | # name. Note that L<manpage/> forces a manpage interpretation, as does
|
---|
396 | # something looking like L<manpage(section)>. The latter is an
|
---|
397 | # enhancement over the original Pod::Text.
|
---|
398 | my ($manpage, $section) = ('', $_);
|
---|
399 | if (/^(?:https?|ftp|news):/) {
|
---|
400 | # a URL
|
---|
401 | return $_;
|
---|
402 | } elsif (/^"\s*(.*?)\s*"$/) {
|
---|
403 | $section = '"' . $1 . '"';
|
---|
404 | } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
|
---|
405 | ($manpage, $section) = ($_, '');
|
---|
406 | } elsif (m%/%) {
|
---|
407 | ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
|
---|
408 | }
|
---|
409 |
|
---|
410 | my $text = '';
|
---|
411 | # Now build the actual output text.
|
---|
412 | if (!length $section) {
|
---|
413 | $text = "the $manpage manpage" if length $manpage;
|
---|
414 | } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
|
---|
415 | $text .= 'the ' . $section . ' entry';
|
---|
416 | $text .= (length $manpage) ? " in the $manpage manpage"
|
---|
417 | : " elsewhere in this document";
|
---|
418 | } else {
|
---|
419 | $section =~ s/^\"\s*//;
|
---|
420 | $section =~ s/\s*\"$//;
|
---|
421 | $text .= 'the section on "' . $section . '"';
|
---|
422 | $text .= " in the $manpage manpage" if length $manpage;
|
---|
423 | }
|
---|
424 | $text;
|
---|
425 | }
|
---|
426 |
|
---|
427 |
|
---|
428 | ############################################################################
|
---|
429 | # List handling
|
---|
430 | ############################################################################
|
---|
431 |
|
---|
432 | # This method is called whenever an =item command is complete (in other
|
---|
433 | # words, we've seen its associated paragraph or know for certain that it
|
---|
434 | # doesn't have one). It gets the paragraph associated with the item as an
|
---|
435 | # argument. If that argument is empty, just output the item tag; if it
|
---|
436 | # contains a newline, output the item tag followed by the newline.
|
---|
437 | # Otherwise, see if there's enough room for us to output the item tag in the
|
---|
438 | # margin of the text or if we have to put it on a separate line.
|
---|
439 | sub item {
|
---|
440 | my $self = shift;
|
---|
441 | local $_ = shift;
|
---|
442 | my $tag = $$self{ITEM};
|
---|
443 | unless (defined $tag) {
|
---|
444 | carp "item called without tag";
|
---|
445 | return;
|
---|
446 | }
|
---|
447 | undef $$self{ITEM};
|
---|
448 | my $indent = $$self{INDENTS}[-1];
|
---|
449 | unless (defined $indent) { $indent = $$self{indent} }
|
---|
450 | my $space = ' ' x $indent;
|
---|
451 | $space =~ s/^ /:/ if $$self{alt};
|
---|
452 | if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
|
---|
453 | my $margin = $$self{MARGIN};
|
---|
454 | $$self{MARGIN} = $indent;
|
---|
455 | my $output = $self->reformat ($tag);
|
---|
456 | $output =~ s/\n*$/\n/;
|
---|
457 | $self->output ($output);
|
---|
458 | $$self{MARGIN} = $margin;
|
---|
459 | $self->output ($self->reformat ($_)) if /\S/;
|
---|
460 | } else {
|
---|
461 | $_ = $self->reformat ($_);
|
---|
462 | s/^ /:/ if ($$self{alt} && $indent > 0);
|
---|
463 | my $tagspace = ' ' x length $tag;
|
---|
464 | s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
|
---|
465 | $self->output ($_);
|
---|
466 | }
|
---|
467 | }
|
---|
468 |
|
---|
469 |
|
---|
470 | ############################################################################
|
---|
471 | # Output formatting
|
---|
472 | ############################################################################
|
---|
473 |
|
---|
474 | # Wrap a line, indenting by the current left margin. We can't use
|
---|
475 | # Text::Wrap because it plays games with tabs. We can't use formline, even
|
---|
476 | # though we'd really like to, because it screws up non-printing characters.
|
---|
477 | # So we have to do the wrapping ourselves.
|
---|
478 | sub wrap {
|
---|
479 | my $self = shift;
|
---|
480 | local $_ = shift;
|
---|
481 | my $output = '';
|
---|
482 | my $spaces = ' ' x $$self{MARGIN};
|
---|
483 | my $width = $$self{width} - $$self{MARGIN};
|
---|
484 | while (length > $width) {
|
---|
485 | if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
|
---|
486 | $output .= $spaces . $1 . "\n";
|
---|
487 | } else {
|
---|
488 | last;
|
---|
489 | }
|
---|
490 | }
|
---|
491 | $output .= $spaces . $_;
|
---|
492 | $output =~ s/\s+$/\n\n/;
|
---|
493 | $output;
|
---|
494 | }
|
---|
495 |
|
---|
496 | # Reformat a paragraph of text for the current margin. Takes the text to
|
---|
497 | # reformat and returns the formatted text.
|
---|
498 | sub reformat {
|
---|
499 | my $self = shift;
|
---|
500 | local $_ = shift;
|
---|
501 |
|
---|
502 | # If we're trying to preserve two spaces after sentences, do some
|
---|
503 | # munging to support that. Otherwise, smash all repeated whitespace.
|
---|
504 | if ($$self{sentence}) {
|
---|
505 | s/ +$//mg;
|
---|
506 | s/\.\n/. \n/g;
|
---|
507 | s/\n/ /g;
|
---|
508 | s/ +/ /g;
|
---|
509 | } else {
|
---|
510 | s/\s+/ /g;
|
---|
511 | }
|
---|
512 | $self->wrap ($_);
|
---|
513 | }
|
---|
514 |
|
---|
515 | # Output text to the output device.
|
---|
516 | sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
|
---|
517 |
|
---|
518 |
|
---|
519 | ############################################################################
|
---|
520 | # Backwards compatibility
|
---|
521 | ############################################################################
|
---|
522 |
|
---|
523 | # The old Pod::Text module did everything in a pod2text() function. This
|
---|
524 | # tries to provide the same interface for legacy applications.
|
---|
525 | sub pod2text {
|
---|
526 | my @args;
|
---|
527 |
|
---|
528 | # This is really ugly; I hate doing option parsing in the middle of a
|
---|
529 | # module. But the old Pod::Text module supported passing flags to its
|
---|
530 | # entry function, so handle -a and -<number>.
|
---|
531 | while ($_[0] =~ /^-/) {
|
---|
532 | my $flag = shift;
|
---|
533 | if ($flag eq '-a') { push (@args, alt => 1) }
|
---|
534 | elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
|
---|
535 | else {
|
---|
536 | unshift (@_, $flag);
|
---|
537 | last;
|
---|
538 | }
|
---|
539 | }
|
---|
540 |
|
---|
541 | # Now that we know what arguments we're using, create the parser.
|
---|
542 | my $parser = Pod::PlainText->new (@args);
|
---|
543 |
|
---|
544 | # If two arguments were given, the second argument is going to be a file
|
---|
545 | # handle. That means we want to call parse_from_filehandle(), which
|
---|
546 | # means we need to turn the first argument into a file handle. Magic
|
---|
547 | # open will handle the <&STDIN case automagically.
|
---|
548 | if (defined $_[1]) {
|
---|
549 | local *IN;
|
---|
550 | unless (open (IN, $_[0])) {
|
---|
551 | croak ("Can't open $_[0] for reading: $!\n");
|
---|
552 | return;
|
---|
553 | }
|
---|
554 | $_[0] = \*IN;
|
---|
555 | return $parser->parse_from_filehandle (@_);
|
---|
556 | } else {
|
---|
557 | return $parser->parse_from_file (@_);
|
---|
558 | }
|
---|
559 | }
|
---|
560 |
|
---|
561 |
|
---|
562 | ############################################################################
|
---|
563 | # Module return value and documentation
|
---|
564 | ############################################################################
|
---|
565 |
|
---|
566 | 1;
|
---|
567 | __END__
|
---|
568 |
|
---|
569 | =head1 NAME
|
---|
570 |
|
---|
571 | Pod::PlainText - Convert POD data to formatted ASCII text
|
---|
572 |
|
---|
573 | =head1 SYNOPSIS
|
---|
574 |
|
---|
575 | use Pod::PlainText;
|
---|
576 | my $parser = Pod::PlainText->new (sentence => 0, width => 78);
|
---|
577 |
|
---|
578 | # Read POD from STDIN and write to STDOUT.
|
---|
579 | $parser->parse_from_filehandle;
|
---|
580 |
|
---|
581 | # Read POD from file.pod and write to file.txt.
|
---|
582 | $parser->parse_from_file ('file.pod', 'file.txt');
|
---|
583 |
|
---|
584 | =head1 DESCRIPTION
|
---|
585 |
|
---|
586 | Pod::PlainText is a module that can convert documentation in the POD format (the
|
---|
587 | preferred language for documenting Perl) into formatted ASCII. It uses no
|
---|
588 | special formatting controls or codes whatsoever, and its output is therefore
|
---|
589 | suitable for nearly any device.
|
---|
590 |
|
---|
591 | As a derived class from Pod::Parser, Pod::PlainText supports the same methods and
|
---|
592 | interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
|
---|
593 | new parser with C<Pod::PlainText-E<gt>new()> and then calls either
|
---|
594 | parse_from_filehandle() or parse_from_file().
|
---|
595 |
|
---|
596 | new() can take options, in the form of key/value pairs, that control the
|
---|
597 | behavior of the parser. The currently recognized options are:
|
---|
598 |
|
---|
599 | =over 4
|
---|
600 |
|
---|
601 | =item alt
|
---|
602 |
|
---|
603 | If set to a true value, selects an alternate output format that, among other
|
---|
604 | things, uses a different heading style and marks C<=item> entries with a
|
---|
605 | colon in the left margin. Defaults to false.
|
---|
606 |
|
---|
607 | =item indent
|
---|
608 |
|
---|
609 | The number of spaces to indent regular text, and the default indentation for
|
---|
610 | C<=over> blocks. Defaults to 4.
|
---|
611 |
|
---|
612 | =item loose
|
---|
613 |
|
---|
614 | If set to a true value, a blank line is printed after a C<=head1> heading.
|
---|
615 | If set to false (the default), no blank line is printed after C<=head1>,
|
---|
616 | although one is still printed after C<=head2>. This is the default because
|
---|
617 | it's the expected formatting for manual pages; if you're formatting
|
---|
618 | arbitrary text documents, setting this to true may result in more pleasing
|
---|
619 | output.
|
---|
620 |
|
---|
621 | =item sentence
|
---|
622 |
|
---|
623 | If set to a true value, Pod::PlainText will assume that each sentence ends in two
|
---|
624 | spaces, and will try to preserve that spacing. If set to false, all
|
---|
625 | consecutive whitespace in non-verbatim paragraphs is compressed into a
|
---|
626 | single space. Defaults to true.
|
---|
627 |
|
---|
628 | =item width
|
---|
629 |
|
---|
630 | The column at which to wrap text on the right-hand side. Defaults to 76.
|
---|
631 |
|
---|
632 | =back
|
---|
633 |
|
---|
634 | The standard Pod::Parser method parse_from_filehandle() takes up to two
|
---|
635 | arguments, the first being the file handle to read POD from and the second
|
---|
636 | being the file handle to write the formatted output to. The first defaults
|
---|
637 | to STDIN if not given, and the second defaults to STDOUT. The method
|
---|
638 | parse_from_file() is almost identical, except that its two arguments are the
|
---|
639 | input and output disk files instead. See L<Pod::Parser> for the specific
|
---|
640 | details.
|
---|
641 |
|
---|
642 | =head1 DIAGNOSTICS
|
---|
643 |
|
---|
644 | =over 4
|
---|
645 |
|
---|
646 | =item Bizarre space in item
|
---|
647 |
|
---|
648 | (W) Something has gone wrong in internal C<=item> processing. This message
|
---|
649 | indicates a bug in Pod::PlainText; you should never see it.
|
---|
650 |
|
---|
651 | =item Can't open %s for reading: %s
|
---|
652 |
|
---|
653 | (F) Pod::PlainText was invoked via the compatibility mode pod2text() interface
|
---|
654 | and the input file it was given could not be opened.
|
---|
655 |
|
---|
656 | =item Unknown escape: %s
|
---|
657 |
|
---|
658 | (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::PlainText didn't
|
---|
659 | know about.
|
---|
660 |
|
---|
661 | =item Unknown sequence: %s
|
---|
662 |
|
---|
663 | (W) The POD source contained a non-standard internal sequence (something of
|
---|
664 | the form C<XE<lt>E<gt>>) that Pod::PlainText didn't know about.
|
---|
665 |
|
---|
666 | =item Unmatched =back
|
---|
667 |
|
---|
668 | (W) Pod::PlainText encountered a C<=back> command that didn't correspond to an
|
---|
669 | C<=over> command.
|
---|
670 |
|
---|
671 | =back
|
---|
672 |
|
---|
673 | =head1 RESTRICTIONS
|
---|
674 |
|
---|
675 | Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
|
---|
676 | output, due to an internal implementation detail.
|
---|
677 |
|
---|
678 | =head1 NOTES
|
---|
679 |
|
---|
680 | This is a replacement for an earlier Pod::Text module written by Tom
|
---|
681 | Christiansen. It has a revamped interface, since it now uses Pod::Parser,
|
---|
682 | but an interface roughly compatible with the old Pod::Text::pod2text()
|
---|
683 | function is still available. Please change to the new calling convention,
|
---|
684 | though.
|
---|
685 |
|
---|
686 | The original Pod::Text contained code to do formatting via termcap
|
---|
687 | sequences, although it wasn't turned on by default and it was problematic to
|
---|
688 | get it to work at all. This rewrite doesn't even try to do that, but a
|
---|
689 | subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
|
---|
690 |
|
---|
691 | =head1 SEE ALSO
|
---|
692 |
|
---|
693 | L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
|
---|
694 | pod2text(1)
|
---|
695 |
|
---|
696 | =head1 AUTHOR
|
---|
697 |
|
---|
698 | Please report bugs using L<http://rt.cpan.org>.
|
---|
699 |
|
---|
700 | Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
|
---|
701 | original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
|
---|
702 | its conversion to Pod::Parser by Brad Appleton
|
---|
703 | E<lt>bradapp@enteract.comE<gt>.
|
---|
704 |
|
---|
705 | =cut
|
---|