| 1 | /* `L' command implementation for GNU sed, based on GNU fmt 1.22.
|
|---|
| 2 | Copyright (C) 1994, 1995, 1996, 2002, 2003 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This program is free software; you can redistribute it and/or modify
|
|---|
| 5 | it under the terms of the GNU General Public License as published by
|
|---|
| 6 | the Free Software Foundation; either version 2, or (at your option)
|
|---|
| 7 | any later version.
|
|---|
| 8 |
|
|---|
| 9 | This program is distributed in the hope that it will be useful,
|
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 12 | GNU General Public License for more details.
|
|---|
| 13 |
|
|---|
| 14 | You should have received a copy of the GNU General Public License
|
|---|
| 15 | along with this program; if not, write to the Free Software Foundation,
|
|---|
| 16 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
|---|
| 17 |
|
|---|
| 18 | /* GNU fmt was written by Ross Paterson <rap@doc.ic.ac.uk>. */
|
|---|
| 19 |
|
|---|
| 20 | #include "sed.h"
|
|---|
| 21 |
|
|---|
| 22 | #include <stdio.h>
|
|---|
| 23 | #include <ctype.h>
|
|---|
| 24 | #include <sys/types.h>
|
|---|
| 25 |
|
|---|
| 26 | #if HAVE_LIMITS_H
|
|---|
| 27 | # include <limits.h>
|
|---|
| 28 | #endif
|
|---|
| 29 |
|
|---|
| 30 | #ifndef UINT_MAX
|
|---|
| 31 | # define UINT_MAX ((unsigned int) ~(unsigned int) 0)
|
|---|
| 32 | #endif
|
|---|
| 33 |
|
|---|
| 34 | #ifndef INT_MAX
|
|---|
| 35 | # define INT_MAX ((int) (UINT_MAX >> 1))
|
|---|
| 36 | #endif
|
|---|
| 37 |
|
|---|
| 38 | /* The following parameters represent the program's idea of what is
|
|---|
| 39 | "best". Adjust to taste, subject to the caveats given. */
|
|---|
| 40 |
|
|---|
| 41 | /* Prefer lines to be LEEWAY % shorter than the maximum width, giving
|
|---|
| 42 | room for optimization. */
|
|---|
| 43 | #define LEEWAY 7
|
|---|
| 44 |
|
|---|
| 45 | /* Costs and bonuses are expressed as the equivalent departure from the
|
|---|
| 46 | optimal line length, multiplied by 10. e.g. assigning something a
|
|---|
| 47 | cost of 50 means that it is as bad as a line 5 characters too short
|
|---|
| 48 | or too long. The definition of SHORT_COST(n) should not be changed.
|
|---|
| 49 | However, EQUIV(n) may need tuning. */
|
|---|
| 50 |
|
|---|
| 51 | typedef long COST;
|
|---|
| 52 |
|
|---|
| 53 | #define MAXCOST (~(((unsigned long) 1) << (8 * sizeof (COST) -1)))
|
|---|
| 54 |
|
|---|
| 55 | #define SQR(n) ((n) * (n))
|
|---|
| 56 | #define EQUIV(n) SQR ((COST) (n))
|
|---|
| 57 |
|
|---|
| 58 | /* Cost of a filled line n chars longer or shorter than best_width. */
|
|---|
| 59 | #define SHORT_COST(n) EQUIV ((n) * 10)
|
|---|
| 60 |
|
|---|
| 61 | /* Cost of the difference between adjacent filled lines. */
|
|---|
| 62 | #define RAGGED_COST(n) (SHORT_COST (n) / 2)
|
|---|
| 63 |
|
|---|
| 64 | /* Basic cost per line. */
|
|---|
| 65 | #define LINE_COST EQUIV (70)
|
|---|
| 66 |
|
|---|
| 67 | /* Cost of breaking a line after the first word of a sentence, where
|
|---|
| 68 | the length of the word is N. */
|
|---|
| 69 | #define WIDOW_COST(n) (EQUIV (200) / ((n) + 2))
|
|---|
| 70 |
|
|---|
| 71 | /* Cost of breaking a line before the last word of a sentence, where
|
|---|
| 72 | the length of the word is N. */
|
|---|
| 73 | #define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2))
|
|---|
| 74 |
|
|---|
| 75 | /* Bonus for breaking a line at the end of a sentence. */
|
|---|
| 76 | #define SENTENCE_BONUS EQUIV (50)
|
|---|
| 77 |
|
|---|
| 78 | /* Cost of breaking a line after a period not marking end of a sentence.
|
|---|
| 79 | With the definition of sentence we are using (borrowed from emacs, see
|
|---|
| 80 | get_line()) such a break would then look like a sentence break. Hence
|
|---|
| 81 | we assign a very high cost -- it should be avoided unless things are
|
|---|
| 82 | really bad. */
|
|---|
| 83 | #define NOBREAK_COST EQUIV (600)
|
|---|
| 84 |
|
|---|
| 85 | /* Bonus for breaking a line before open parenthesis. */
|
|---|
| 86 | #define PAREN_BONUS EQUIV (40)
|
|---|
| 87 |
|
|---|
| 88 | /* Bonus for breaking a line after other punctuation. */
|
|---|
| 89 | #define PUNCT_BONUS EQUIV(40)
|
|---|
| 90 |
|
|---|
| 91 | /* Credit for breaking a long paragraph one line later. */
|
|---|
| 92 | #define LINE_CREDIT EQUIV(3)
|
|---|
| 93 |
|
|---|
| 94 | /* Size of paragraph buffer in words. Longer paragraphs are handled
|
|---|
| 95 | neatly (cf. flush_paragraph()), so there's little to gain by making
|
|---|
| 96 | these larger. */
|
|---|
| 97 | #define MAXWORDS 1000
|
|---|
| 98 |
|
|---|
| 99 | #define GETC() (parabuf == end_of_parabuf ? EOF : *parabuf++)
|
|---|
| 100 |
|
|---|
| 101 | /* Extra ctype(3)-style macros. */
|
|---|
| 102 |
|
|---|
| 103 | #define isopen(c) (strchr ("([`'\"", (c)) != NULL)
|
|---|
| 104 | #define isclose(c) (strchr (")]'\"", (c)) != NULL)
|
|---|
| 105 | #define isperiod(c) (strchr (".?!", (c)) != NULL)
|
|---|
| 106 |
|
|---|
| 107 | /* Size of a tab stop, for expansion on input and re-introduction on
|
|---|
| 108 | output. */
|
|---|
| 109 | #define TABWIDTH 8
|
|---|
| 110 |
|
|---|
| 111 | /* Word descriptor structure. */
|
|---|
| 112 |
|
|---|
| 113 | typedef struct Word WORD;
|
|---|
| 114 |
|
|---|
| 115 | struct Word
|
|---|
| 116 | {
|
|---|
| 117 |
|
|---|
| 118 | /* Static attributes determined during input. */
|
|---|
| 119 |
|
|---|
| 120 | const char *text; /* the text of the word */
|
|---|
| 121 | short length; /* length of this word */
|
|---|
| 122 | short space; /* the size of the following space */
|
|---|
| 123 | unsigned paren:1; /* starts with open paren */
|
|---|
| 124 | unsigned period:1; /* ends in [.?!])* */
|
|---|
| 125 | unsigned punct:1; /* ends in punctuation */
|
|---|
| 126 | unsigned final:1; /* end of sentence */
|
|---|
| 127 |
|
|---|
| 128 | /* The remaining fields are computed during the optimization. */
|
|---|
| 129 |
|
|---|
| 130 | short line_length; /* length of the best line starting here */
|
|---|
| 131 | COST best_cost; /* cost of best paragraph starting here */
|
|---|
| 132 | WORD *next_break; /* break which achieves best_cost */
|
|---|
| 133 | };
|
|---|
| 134 |
|
|---|
| 135 | /* Forward declarations. */
|
|---|
| 136 |
|
|---|
| 137 | static bool get_paragraph P_ ((void));
|
|---|
| 138 | static int get_line P_ ((int c));
|
|---|
| 139 | static int get_space P_ ((int c));
|
|---|
| 140 | static int copy_rest P_ ((int c));
|
|---|
| 141 | static bool same_para P_ ((int c));
|
|---|
| 142 | static void flush_paragraph P_ ((void));
|
|---|
| 143 | static void fmt_paragraph P_ ((void));
|
|---|
| 144 | static void check_punctuation P_ ((WORD *w));
|
|---|
| 145 | static COST base_cost P_ ((WORD *this));
|
|---|
| 146 | static COST line_cost P_ ((WORD *next, int len));
|
|---|
| 147 | static void put_paragraph P_ ((WORD *finish));
|
|---|
| 148 | static void put_line P_ ((WORD *w, int indent));
|
|---|
| 149 | static void put_word P_ ((WORD *w));
|
|---|
| 150 | static void put_space P_ ((int space));
|
|---|
| 151 |
|
|---|
| 152 | /* Option values. */
|
|---|
| 153 |
|
|---|
| 154 | /* User-supplied maximum line width (default WIDTH). The only output
|
|---|
| 155 | lines
|
|---|
| 156 | longer than this will each comprise a single word. */
|
|---|
| 157 | static int max_width;
|
|---|
| 158 |
|
|---|
| 159 | /* Space for the paragraph text. */
|
|---|
| 160 | static const char *parabuf;
|
|---|
| 161 |
|
|---|
| 162 | /* End of space for the paragraph text. */
|
|---|
| 163 | static const char *end_of_parabuf;
|
|---|
| 164 |
|
|---|
| 165 | /* The file on which we output */
|
|---|
| 166 | static FILE *outfile;
|
|---|
| 167 |
|
|---|
| 168 | /* Values derived from the option values. */
|
|---|
| 169 |
|
|---|
| 170 | /* The preferred width of text lines, set to LEEWAY % less than max_width. */
|
|---|
| 171 | static int best_width;
|
|---|
| 172 |
|
|---|
| 173 | /* Dynamic variables. */
|
|---|
| 174 |
|
|---|
| 175 | /* Start column of the character most recently read from the input file. */
|
|---|
| 176 | static int in_column;
|
|---|
| 177 |
|
|---|
| 178 | /* Start column of the next character to be written to stdout. */
|
|---|
| 179 | static int out_column;
|
|---|
| 180 |
|
|---|
| 181 | /* The words of a paragraph -- longer paragraphs are handled neatly
|
|---|
| 182 | (cf. flush_paragraph()). */
|
|---|
| 183 | static WORD words[MAXWORDS];
|
|---|
| 184 |
|
|---|
| 185 | /* A pointer into the above word array, indicating the first position
|
|---|
| 186 | after the last complete word. Sometimes it will point at an incomplete
|
|---|
| 187 | word. */
|
|---|
| 188 | static WORD *word_limit;
|
|---|
| 189 |
|
|---|
| 190 | /* Indentation of the first line of the current paragraph. */
|
|---|
| 191 | static int first_indent;
|
|---|
| 192 |
|
|---|
| 193 | /* Indentation of other lines of the current paragraph */
|
|---|
| 194 | static int other_indent;
|
|---|
| 195 |
|
|---|
| 196 | /* The last character read from the input file. */
|
|---|
| 197 | static int next_char;
|
|---|
| 198 |
|
|---|
| 199 | /* If nonzero, the length of the last line output in the current
|
|---|
| 200 | paragraph, used to charge for raggedness at the split point for long
|
|---|
| 201 | paragraphs chosen by fmt_paragraph(). */
|
|---|
| 202 | static int last_line_length;
|
|---|
| 203 |
|
|---|
| 204 | /* read file F and send formatted output to stdout. */
|
|---|
| 205 |
|
|---|
| 206 | void
|
|---|
| 207 | fmt (const char *line, const char *line_end, int max_length, FILE *output_file)
|
|---|
| 208 | {
|
|---|
| 209 | parabuf = line;
|
|---|
| 210 | end_of_parabuf = line_end;
|
|---|
| 211 | outfile = output_file;
|
|---|
| 212 |
|
|---|
| 213 | max_width = max_length;
|
|---|
| 214 | best_width = max_width * (201 - 2 * LEEWAY) / 200;
|
|---|
| 215 |
|
|---|
| 216 | in_column = 0;
|
|---|
| 217 | other_indent = 0;
|
|---|
| 218 | next_char = GETC();
|
|---|
| 219 | while (get_paragraph ())
|
|---|
| 220 | {
|
|---|
| 221 | fmt_paragraph ();
|
|---|
| 222 | put_paragraph (word_limit);
|
|---|
| 223 | }
|
|---|
| 224 | }
|
|---|
| 225 |
|
|---|
| 226 | /* Read a paragraph from input file F. A paragraph consists of a
|
|---|
| 227 | maximal number of non-blank (excluding any prefix) lines
|
|---|
| 228 | with the same indent.
|
|---|
| 229 |
|
|---|
| 230 | Return false if end-of-file was encountered before the start of a
|
|---|
| 231 | paragraph, else true. */
|
|---|
| 232 |
|
|---|
| 233 | static bool
|
|---|
| 234 | get_paragraph ()
|
|---|
| 235 | {
|
|---|
| 236 | register int c;
|
|---|
| 237 |
|
|---|
| 238 | last_line_length = 0;
|
|---|
| 239 | c = next_char;
|
|---|
| 240 |
|
|---|
| 241 | /* Scan (and copy) blank lines, and lines not introduced by the prefix. */
|
|---|
| 242 |
|
|---|
| 243 | while (c == '\n' || c == EOF)
|
|---|
| 244 | {
|
|---|
| 245 | c = copy_rest (c);
|
|---|
| 246 | if (c == EOF)
|
|---|
| 247 | {
|
|---|
| 248 | next_char = EOF;
|
|---|
| 249 | return false;
|
|---|
| 250 | }
|
|---|
| 251 | putc ('\n', outfile);
|
|---|
| 252 | c = GETC();
|
|---|
| 253 | }
|
|---|
| 254 |
|
|---|
| 255 | /* Got a suitable first line for a paragraph. */
|
|---|
| 256 |
|
|---|
| 257 | first_indent = in_column;
|
|---|
| 258 | word_limit = words;
|
|---|
| 259 | c = get_line (c);
|
|---|
| 260 |
|
|---|
| 261 | /* Read rest of paragraph. */
|
|---|
| 262 |
|
|---|
| 263 | other_indent = in_column;
|
|---|
| 264 | while (same_para (c) && in_column == other_indent)
|
|---|
| 265 | c = get_line (c);
|
|---|
| 266 |
|
|---|
| 267 | (word_limit - 1)->period = (word_limit - 1)->final = true;
|
|---|
| 268 | next_char = c;
|
|---|
| 269 | return true;
|
|---|
| 270 | }
|
|---|
| 271 |
|
|---|
| 272 | /* Copy to the output a blank line. In the latter, C is \n or EOF.
|
|---|
| 273 | Return the character (\n or EOF) ending the line. */
|
|---|
| 274 |
|
|---|
| 275 | static int
|
|---|
| 276 | copy_rest (register int c)
|
|---|
| 277 | {
|
|---|
| 278 | out_column = 0;
|
|---|
| 279 | while (c != '\n' && c != EOF)
|
|---|
| 280 | {
|
|---|
| 281 | putc (c, outfile);
|
|---|
| 282 | c = GETC();
|
|---|
| 283 | }
|
|---|
| 284 | return c;
|
|---|
| 285 | }
|
|---|
| 286 |
|
|---|
| 287 | /* Return true if a line whose first non-blank character after the
|
|---|
| 288 | prefix (if any) is C could belong to the current paragraph,
|
|---|
| 289 | otherwise false. */
|
|---|
| 290 |
|
|---|
| 291 | static bool
|
|---|
| 292 | same_para (register int c)
|
|---|
| 293 | {
|
|---|
| 294 | return (c != '\n' && c != EOF);
|
|---|
| 295 | }
|
|---|
| 296 |
|
|---|
| 297 | /* Read a line from the input data given first non-blank character C
|
|---|
| 298 | after the prefix, and the following indent, and break it into words.
|
|---|
| 299 | A word is a maximal non-empty string of non-white characters. A word
|
|---|
| 300 | ending in [.?!]["')\]]* and followed by end-of-line or at least two
|
|---|
| 301 | spaces ends a sentence, as in emacs.
|
|---|
| 302 |
|
|---|
| 303 | Return the first non-blank character of the next line. */
|
|---|
| 304 |
|
|---|
| 305 | static int
|
|---|
| 306 | get_line (register int c)
|
|---|
| 307 | {
|
|---|
| 308 | int start;
|
|---|
| 309 | register WORD *end_of_word;
|
|---|
| 310 |
|
|---|
| 311 | end_of_word = &words[MAXWORDS - 2];
|
|---|
| 312 |
|
|---|
| 313 | do
|
|---|
| 314 | { /* for each word in a line */
|
|---|
| 315 |
|
|---|
| 316 | /* Scan word. */
|
|---|
| 317 |
|
|---|
| 318 | word_limit->text = parabuf - 1;
|
|---|
| 319 | do
|
|---|
| 320 | c = GETC();
|
|---|
| 321 | while (c != EOF && !ISSPACE (c));
|
|---|
| 322 | word_limit->length = parabuf - word_limit->text - (c != EOF);
|
|---|
| 323 | in_column += word_limit->length;
|
|---|
| 324 |
|
|---|
| 325 | check_punctuation (word_limit);
|
|---|
| 326 |
|
|---|
| 327 | /* Scan inter-word space. */
|
|---|
| 328 |
|
|---|
| 329 | start = in_column;
|
|---|
| 330 | c = get_space (c);
|
|---|
| 331 | word_limit->space = in_column - start;
|
|---|
| 332 | word_limit->final = (c == EOF
|
|---|
| 333 | || (word_limit->period
|
|---|
| 334 | && (c == '\n' || word_limit->space > 1)));
|
|---|
| 335 | if (c == '\n' || c == EOF)
|
|---|
| 336 | word_limit->space = word_limit->final ? 2 : 1;
|
|---|
| 337 | if (word_limit == end_of_word)
|
|---|
| 338 | flush_paragraph ();
|
|---|
| 339 | word_limit++;
|
|---|
| 340 | if (c == EOF)
|
|---|
| 341 | {
|
|---|
| 342 | in_column = first_indent;
|
|---|
| 343 | return EOF;
|
|---|
| 344 | }
|
|---|
| 345 | }
|
|---|
| 346 | while (c != '\n');
|
|---|
| 347 |
|
|---|
| 348 | in_column = 0;
|
|---|
| 349 | c = GETC();
|
|---|
| 350 | return get_space (c);
|
|---|
| 351 | }
|
|---|
| 352 |
|
|---|
| 353 | /* Read blank characters from the input data, starting with C, and keeping
|
|---|
| 354 | in_column up-to-date. Return first non-blank character. */
|
|---|
| 355 |
|
|---|
| 356 | static int
|
|---|
| 357 | get_space (register int c)
|
|---|
| 358 | {
|
|---|
| 359 | for (;;)
|
|---|
| 360 | {
|
|---|
| 361 | if (c == ' ')
|
|---|
| 362 | in_column++;
|
|---|
| 363 | else if (c == '\t')
|
|---|
| 364 | in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
|
|---|
| 365 | else
|
|---|
| 366 | return c;
|
|---|
| 367 | c = GETC();
|
|---|
| 368 | }
|
|---|
| 369 | }
|
|---|
| 370 |
|
|---|
| 371 | /* Set extra fields in word W describing any attached punctuation. */
|
|---|
| 372 |
|
|---|
| 373 | static void
|
|---|
| 374 | check_punctuation (register WORD *w)
|
|---|
| 375 | {
|
|---|
| 376 | register const char *start, *finish;
|
|---|
| 377 |
|
|---|
| 378 | start = w->text;
|
|---|
| 379 | finish = start + (w->length - 1);
|
|---|
| 380 | w->paren = isopen (*start);
|
|---|
| 381 | w->punct = ISPUNCT (*finish);
|
|---|
| 382 | while (isclose (*finish) && finish > start)
|
|---|
| 383 | finish--;
|
|---|
| 384 | w->period = isperiod (*finish);
|
|---|
| 385 | }
|
|---|
| 386 |
|
|---|
| 387 | /* Flush part of the paragraph to make room. This function is called on
|
|---|
| 388 | hitting the limit on the number of words or characters. */
|
|---|
| 389 |
|
|---|
| 390 | static void
|
|---|
| 391 | flush_paragraph (void)
|
|---|
| 392 | {
|
|---|
| 393 | WORD *split_point;
|
|---|
| 394 | register WORD *w;
|
|---|
| 395 | COST best_break;
|
|---|
| 396 |
|
|---|
| 397 | /* - format what you have so far as a paragraph,
|
|---|
| 398 | - find a low-cost line break near the end,
|
|---|
| 399 | - output to there,
|
|---|
| 400 | - make that the start of the paragraph. */
|
|---|
| 401 |
|
|---|
| 402 | fmt_paragraph ();
|
|---|
| 403 |
|
|---|
| 404 | /* Choose a good split point. */
|
|---|
| 405 |
|
|---|
| 406 | split_point = word_limit;
|
|---|
| 407 | best_break = MAXCOST;
|
|---|
| 408 | for (w = words->next_break; w != word_limit; w = w->next_break)
|
|---|
| 409 | {
|
|---|
| 410 | if (w->best_cost - w->next_break->best_cost < best_break)
|
|---|
| 411 | {
|
|---|
| 412 | split_point = w;
|
|---|
| 413 | best_break = w->best_cost - w->next_break->best_cost;
|
|---|
| 414 | }
|
|---|
| 415 | if (best_break <= MAXCOST - LINE_CREDIT)
|
|---|
| 416 | best_break += LINE_CREDIT;
|
|---|
| 417 | }
|
|---|
| 418 | put_paragraph (split_point);
|
|---|
| 419 |
|
|---|
| 420 | /* Copy words from split_point down to word -- we use memmove because
|
|---|
| 421 | the source and target may overlap. */
|
|---|
| 422 |
|
|---|
| 423 | memmove ((char *) words, (char *) split_point,
|
|---|
| 424 | (word_limit - split_point + 1) * sizeof (WORD));
|
|---|
| 425 | word_limit -= split_point - words;
|
|---|
| 426 | }
|
|---|
| 427 |
|
|---|
| 428 | /* Compute the optimal formatting for the whole paragraph by computing
|
|---|
| 429 | and remembering the optimal formatting for each suffix from the empty
|
|---|
| 430 | one to the whole paragraph. */
|
|---|
| 431 |
|
|---|
| 432 | static void
|
|---|
| 433 | fmt_paragraph (void)
|
|---|
| 434 | {
|
|---|
| 435 | register WORD *start, *w;
|
|---|
| 436 | register int len;
|
|---|
| 437 | register COST wcost, best;
|
|---|
| 438 | int saved_length;
|
|---|
| 439 |
|
|---|
| 440 | word_limit->best_cost = 0;
|
|---|
| 441 | saved_length = word_limit->length;
|
|---|
| 442 | word_limit->length = max_width; /* sentinel */
|
|---|
| 443 |
|
|---|
| 444 | for (start = word_limit - 1; start >= words; start--)
|
|---|
| 445 | {
|
|---|
| 446 | best = MAXCOST;
|
|---|
| 447 | len = start == words ? first_indent : other_indent;
|
|---|
| 448 |
|
|---|
| 449 | /* At least one word, however long, in the line. */
|
|---|
| 450 |
|
|---|
| 451 | w = start;
|
|---|
| 452 | len += w->length;
|
|---|
| 453 | do
|
|---|
| 454 | {
|
|---|
| 455 | w++;
|
|---|
| 456 |
|
|---|
| 457 | /* Consider breaking before w. */
|
|---|
| 458 |
|
|---|
| 459 | wcost = line_cost (w, len) + w->best_cost;
|
|---|
| 460 | if (start == words && last_line_length > 0)
|
|---|
| 461 | wcost += RAGGED_COST (len - last_line_length);
|
|---|
| 462 | if (wcost < best)
|
|---|
| 463 | {
|
|---|
| 464 | best = wcost;
|
|---|
| 465 | start->next_break = w;
|
|---|
| 466 | start->line_length = len;
|
|---|
| 467 | }
|
|---|
| 468 | len += (w - 1)->space + w->length; /* w > start >= words */
|
|---|
| 469 | }
|
|---|
| 470 | while (len < max_width);
|
|---|
| 471 | start->best_cost = best + base_cost (start);
|
|---|
| 472 | }
|
|---|
| 473 |
|
|---|
| 474 | word_limit->length = saved_length;
|
|---|
| 475 | }
|
|---|
| 476 |
|
|---|
| 477 | /* Return the constant component of the cost of breaking before the
|
|---|
| 478 | word THIS. */
|
|---|
| 479 |
|
|---|
| 480 | static COST
|
|---|
| 481 | base_cost (register WORD *this)
|
|---|
| 482 | {
|
|---|
| 483 | register COST cost;
|
|---|
| 484 |
|
|---|
| 485 | cost = LINE_COST;
|
|---|
| 486 |
|
|---|
| 487 | if (this > words)
|
|---|
| 488 | {
|
|---|
| 489 | if ((this - 1)->period)
|
|---|
| 490 | {
|
|---|
| 491 | if ((this - 1)->final)
|
|---|
| 492 | cost -= SENTENCE_BONUS;
|
|---|
| 493 | else
|
|---|
| 494 | cost += NOBREAK_COST;
|
|---|
| 495 | }
|
|---|
| 496 | else if ((this - 1)->punct)
|
|---|
| 497 | cost -= PUNCT_BONUS;
|
|---|
| 498 | else if (this > words + 1 && (this - 2)->final)
|
|---|
| 499 | cost += WIDOW_COST ((this - 1)->length);
|
|---|
| 500 | }
|
|---|
| 501 |
|
|---|
| 502 | if (this->paren)
|
|---|
| 503 | cost -= PAREN_BONUS;
|
|---|
| 504 | else if (this->final)
|
|---|
| 505 | cost += ORPHAN_COST (this->length);
|
|---|
| 506 |
|
|---|
| 507 | return cost;
|
|---|
| 508 | }
|
|---|
| 509 |
|
|---|
| 510 | /* Return the component of the cost of breaking before word NEXT that
|
|---|
| 511 | depends on LEN, the length of the line beginning there. */
|
|---|
| 512 |
|
|---|
| 513 | static COST
|
|---|
| 514 | line_cost (register WORD *next, register int len)
|
|---|
| 515 | {
|
|---|
| 516 | register int n;
|
|---|
| 517 | register COST cost;
|
|---|
| 518 |
|
|---|
| 519 | if (next == word_limit)
|
|---|
| 520 | return 0;
|
|---|
| 521 | n = best_width - len;
|
|---|
| 522 | cost = SHORT_COST (n);
|
|---|
| 523 | if (next->next_break != word_limit)
|
|---|
| 524 | {
|
|---|
| 525 | n = len - next->line_length;
|
|---|
| 526 | cost += RAGGED_COST (n);
|
|---|
| 527 | }
|
|---|
| 528 | return cost;
|
|---|
| 529 | }
|
|---|
| 530 |
|
|---|
| 531 | /* Output to stdout a paragraph from word up to (but not including)
|
|---|
| 532 | FINISH, which must be in the next_break chain from word. */
|
|---|
| 533 |
|
|---|
| 534 | static void
|
|---|
| 535 | put_paragraph (register WORD *finish)
|
|---|
| 536 | {
|
|---|
| 537 | register WORD *w;
|
|---|
| 538 |
|
|---|
| 539 | put_line (words, first_indent);
|
|---|
| 540 | for (w = words->next_break; w != finish; w = w->next_break)
|
|---|
| 541 | put_line (w, other_indent);
|
|---|
| 542 | }
|
|---|
| 543 |
|
|---|
| 544 | /* Output to stdout the line beginning with word W, beginning in column
|
|---|
| 545 | INDENT, including the prefix (if any). */
|
|---|
| 546 |
|
|---|
| 547 | static void
|
|---|
| 548 | put_line (register WORD *w, int indent)
|
|---|
| 549 | {
|
|---|
| 550 | register WORD *endline;
|
|---|
| 551 | out_column = 0;
|
|---|
| 552 | put_space (indent);
|
|---|
| 553 |
|
|---|
| 554 | endline = w->next_break - 1;
|
|---|
| 555 | for (; w != endline; w++)
|
|---|
| 556 | {
|
|---|
| 557 | put_word (w);
|
|---|
| 558 | put_space (w->space);
|
|---|
| 559 | }
|
|---|
| 560 | put_word (w);
|
|---|
| 561 | last_line_length = out_column;
|
|---|
| 562 | putc ('\n', outfile);
|
|---|
| 563 | }
|
|---|
| 564 |
|
|---|
| 565 | /* Output to stdout the word W. */
|
|---|
| 566 |
|
|---|
| 567 | static void
|
|---|
| 568 | put_word (register WORD *w)
|
|---|
| 569 | {
|
|---|
| 570 | register const char *s;
|
|---|
| 571 | register int n;
|
|---|
| 572 |
|
|---|
| 573 | s = w->text;
|
|---|
| 574 | for (n = w->length; n != 0; n--)
|
|---|
| 575 | putc (*s++, outfile);
|
|---|
| 576 | out_column += w->length;
|
|---|
| 577 | }
|
|---|
| 578 |
|
|---|
| 579 | /* Output to stdout SPACE spaces, or equivalent tabs. */
|
|---|
| 580 |
|
|---|
| 581 | static void
|
|---|
| 582 | put_space (int space)
|
|---|
| 583 | {
|
|---|
| 584 | out_column += space;
|
|---|
| 585 | while (space--)
|
|---|
| 586 | putc (' ', outfile);
|
|---|
| 587 | }
|
|---|