source: trunk/emx/src/emxdoc/lb.c@ 2446

Last change on this file since 2446 was 18, checked in by bird, 22 years ago

Initial revision

  • Property cvs2svn:cvs-rev set to 1.1
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 16.8 KB
Line 
1/* lb.c -- Line breaking
2 Copyright (c) 1993-1996 Eberhard Mattes
3
4This file is part of emxdoc.
5
6emxdoc is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11emxdoc is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with emxdoc; see the file COPYING. If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
20
21
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include "lb.h"
26
27#define FALSE 0
28#define TRUE 1
29
30#define NODE_WORD 1
31#define NODE_GLUE 2
32#define NODE_PENALTY 3
33#define NODE_DISCR 4
34#define NODE_PRE 5
35#define NODE_POST 6
36#define NODE_NEWLINE 7
37
38#define HASH_SIZE 997
39
40#define HYPHEN_PENALTY 18
41
42struct node
43{
44 struct node *next;
45 char *word, *pre, *post;
46 const void *info;
47 int value, value_pre, value_post;
48 char type;
49};
50
51struct brkp
52{
53 struct node *node;
54 int chain;
55 int cost;
56 char special;
57};
58
59struct hword
60{
61 struct hword *next;
62 char *str;
63};
64
65struct lbh
66{
67 struct hword *hash_table[HASH_SIZE];
68};
69
70struct lb
71{
72 int lmargin;
73 int rmargin;
74 int lmargin_1;
75 int rmargin_1;
76 int brkp_count;
77 const struct lbh *hyphenation;
78 const struct node *cur_node;
79 struct node *node_list;
80 struct node **node_add;
81 struct brkp *brkps;
82 int *distance;
83};
84
85
86static const struct hword *lbh_find (const struct lbh *p, const char *s);
87
88
89int lb_init (struct lb **pp, int lmargin, int rmargin)
90{
91 struct lb *p;
92
93 *pp = NULL;
94 if (lmargin < 0 || lmargin >= rmargin)
95 return LB_INVAL;
96 p = malloc (sizeof (struct lb));
97 if (p == NULL) return LB_NOMEM;
98 p->lmargin = p->lmargin_1 = lmargin;
99 p->rmargin = p->rmargin_1 = rmargin;
100 p->brkp_count = 0;
101 p->cur_node = NULL;
102 p->node_list = NULL;
103 p->node_add = &p->node_list;
104 p->brkps = NULL;
105 p->distance = NULL;
106 p->hyphenation = NULL;
107 *pp = p;
108 return 0;
109}
110
111
112static void lb_free_node (struct node *n1)
113{
114 if (n1->word != NULL) free (n1->word);
115 if (n1->pre != NULL) free (n1->pre);
116 if (n1->post != NULL) free (n1->post);
117 free (n1);
118}
119
120
121int lb_exit (struct lb **pp)
122{
123 struct lb *p;
124 struct node *n1, *n2;
125
126 p = *pp; *pp = NULL;
127 if (p != NULL)
128 {
129 for (n1 = p->node_list; n1 != NULL; n1 = n2)
130 {
131 n2 = n1->next;
132 lb_free_node (n1);
133 }
134 if (p->brkps != NULL) free (p->brkps);
135 if (p->distance != NULL) free (p->distance);
136 free (p);
137 }
138 return 0;
139}
140
141
142static struct node * lb_new (struct lb *p, char type, int value,
143 const char *word, const void *info)
144{
145 struct node *n1;
146
147 n1 = malloc (sizeof (struct node));
148 if (n1 == NULL) return NULL;
149 if (word == NULL)
150 n1->word = NULL;
151 else
152 {
153 n1->word = strdup (word);
154 if (n1->word == NULL)
155 {
156 free (n1);
157 return NULL;
158 }
159 }
160 n1->next = NULL;
161 n1->type = type;
162 n1->value = value;
163 n1->value_pre = 0;
164 n1->value_post = 0;
165 n1->pre = NULL;
166 n1->post = NULL;
167 n1->info = info;
168 return n1;
169}
170
171
172static int lb_add (struct lb *p, char type, int value, const char *word,
173 int value_pre, char *pre, int value_post, char *post,
174 const void *info)
175{
176 struct node *n1;
177
178 n1 = lb_new (p, type, value, word, info);
179 if (n1 == NULL) return LB_NOMEM;
180 n1->pre = pre;
181 n1->post = post;
182 n1->value_pre = value_pre;
183 n1->value_post = value_post;
184 *(p->node_add) = n1;
185 p->node_add = &n1->next;
186 return 0;
187}
188
189
190int lb_penalty (struct lb *p, int penalty)
191{
192 return lb_add (p, NODE_PENALTY, penalty, NULL, 0, NULL, 0, NULL, NULL);
193}
194
195
196int lb_word (struct lb *p, int width, const char *word, const void *info)
197{
198 return lb_add (p, NODE_WORD, width, word, 0, NULL, 0, NULL, info);
199}
200
201
202int lb_discr (struct lb *p, int width_word, const char *word,
203 int width_pre, const char *pre, int width_post, const char *post,
204 const void *info)
205{
206 char *p1, *p2;
207 int rc;
208
209 if (pre == NULL)
210 p1 = NULL;
211 else
212 {
213 p1 = strdup (pre);
214 if (p1 == NULL)
215 return LB_NOMEM;
216 }
217 if (post == NULL)
218 p2 = NULL;
219 else
220 {
221 p2 = strdup (post);
222 if (p2 == NULL)
223 {
224 if (p1 != NULL) free (p1);
225 return LB_NOMEM;
226 }
227 }
228 rc = lb_add (p, NODE_DISCR, width_word, word, width_pre, p1, width_post, p2,
229 info);
230 if (rc != 0)
231 {
232 if (p1 != NULL) free (p1);
233 if (p2 != NULL) free (p2);
234 }
235 return rc;
236}
237
238
239int lb_hyphen (struct lb *p, int width, const void *info)
240{
241 return lb_discr (p, 0, "", width, "-", 0, "", info);
242}
243
244
245int lb_glue (struct lb *p, int width, const void *info)
246{
247 return lb_add (p, NODE_GLUE, width, NULL, 0, NULL, 0, NULL, info);
248}
249
250
251int lb_first_rmargin (struct lb *p, int margin)
252{
253 if (margin < 1)
254 return LB_INVAL;
255 p->rmargin_1 = margin;
256 return 0;
257}
258
259
260int lb_first_lmargin (struct lb *p, int margin)
261{
262 if (margin < 0)
263 return LB_INVAL;
264 p->lmargin_1 = margin;
265 return 0;
266}
267
268
269int lb_use_hyphenation (struct lb *p, const struct lbh *h)
270{
271 p->hyphenation = h;
272 return 0;
273}
274
275
276static int lb_hyphenation (struct lb *p)
277{
278 struct node *n1, *n2, *n3;
279 const struct hword *hw;
280 const char *start, *hyph;
281 char *pre;
282 int len;
283
284 for (n1 = p->node_list; n1 != NULL; n1 = n1->next)
285 if (n1->type == NODE_WORD && strchr (n1->word, LB_HYPHEN) == NULL
286 && (hw = lbh_find (p->hyphenation, n1->word)) != NULL)
287 {
288 start = hw->str;
289 free (n1->word); n1->word = NULL;
290 while ((hyph = strchr (start, LB_HYPHEN)) != NULL)
291 {
292 if (hyph != start && hyph[-1] == '-')
293 pre = NULL;
294 else
295 {
296 pre = strdup ("-");
297 if (pre == NULL) return LB_NOMEM;
298 }
299
300 n2 = lb_new (p, NODE_DISCR, 0, NULL, n1->info);
301 if (n2 == NULL) return LB_NOMEM;
302 n2->pre = pre;
303 n2->value_pre = (pre != NULL ? (int)strlen (pre) : 0);
304 n2->next = n1->next; n1->next = n2;
305
306 n3 = lb_new (p, NODE_WORD, 0, NULL, n1->info);
307 if (n3 == NULL) return LB_NOMEM;
308 n3->next = n2->next; n2->next = n3;
309
310 len = hyph - start;
311 n1->value = len;
312 n1->word = malloc ((size_t)(len + 1));
313 if (n1->word == NULL) return LB_NOMEM;
314 memcpy (n1->word, start, (size_t)len);
315 n1->word[len] = 0;
316
317 n1 = n3;
318 start = hyph + 1;
319 }
320 n1->value = (int)strlen (start);
321 n1->word = strdup (start);
322 if (n1->word == NULL) return LB_NOMEM;
323 }
324 return 0;
325}
326
327
328static void lb_add_brkp (struct lb *p, struct node *n1, int store)
329{
330 if (store)
331 p->brkps[p->brkp_count].node = n1;
332 ++p->brkp_count;
333}
334
335
336static int lb_find_brkps (struct lb *p, int store)
337{
338 struct node *n1;
339
340 p->brkp_count = 0;
341 n1 = p->node_list;
342 lb_add_brkp (p, n1, store);
343 while (n1 != NULL)
344 switch (n1->type)
345 {
346 case NODE_WORD:
347 n1 = n1->next;
348 break;
349 case NODE_PENALTY:
350 if (n1->value != LB_INFINITY)
351 lb_add_brkp (p, n1, store);
352 while (n1 != NULL && n1->type != NODE_WORD)
353 n1 = n1->next;
354 break;
355 case NODE_GLUE:
356 lb_add_brkp (p, n1, store);
357 while (n1 != NULL && n1->type == NODE_GLUE)
358 n1 = n1->next;
359 break;
360 case NODE_DISCR:
361 lb_add_brkp (p, n1, store);
362 n1 = n1->next;
363 break;
364 default:
365 return LB_INTERN;
366 }
367 return 0;
368}
369
370
371static int cost_add (int c1, int c2)
372{
373 if (c1 >= LB_INFINITY || c2 >= LB_INFINITY || c1 + c2 >= LB_INFINITY)
374 return LB_INFINITY;
375 else
376 return c1 + c2;
377}
378
379
380#define DISTANCE(P,B1,B2) ((P)->distance[(B1) * (P)->brkp_count + (B2)])
381
382static int lb_compute_distance (struct lb *p)
383{
384 int i, j, c, w, w0, hc;
385 struct node *n0, *n1, *n2;
386
387 p->distance = malloc ((size_t)(p->brkp_count * p->brkp_count
388 * sizeof (int)));
389 if (p->distance == NULL) return LB_NOMEM;
390
391 for (i = 0; i < p->brkp_count; ++i)
392 {
393 DISTANCE (p, i, i) = 0;
394 for (j = i + 1; j < p->brkp_count; ++j)
395 {
396 n0 = p->brkps[i].node;
397 n2 = p->brkps[j].node;
398 w = 0; hc = 0;
399 /* Discard glue at the beginning of a line unless preceded
400 by a penalty. */
401 n1 = n0;
402 while (n1 != NULL && n1->type == NODE_GLUE)
403 n1 = n1->next;
404 for (; n1 != NULL; n1 = n1->next)
405 {
406 switch (n1->type)
407 {
408 case NODE_WORD:
409 w += n1->value;
410 break;
411 case NODE_GLUE:
412 w += n1->value; /* Rigid glue */
413 break;
414 case NODE_PENALTY:
415 break;
416 case NODE_DISCR:
417 if (n1 == n0)
418 w += n1->value_post;
419 else if (n1 == n2)
420 {
421 w += n1->value_pre;
422 hc = HYPHEN_PENALTY;
423 }
424 else
425 w += n1->value;
426 break;
427 default:
428 return LB_INTERN;
429 }
430 if (n1 == n2)
431 break;
432 }
433
434 if (i == 0)
435 w0 = p->rmargin_1 - p->lmargin_1;
436 else
437 w0 = p->rmargin - p->lmargin;
438
439 if (w > w0)
440 c = LB_INFINITY;
441 else if (j == p->brkp_count - 1)
442 {
443 /* The length of the last line of a paragraph does not
444 matter. Note that n2 is a penalty node with a value
445 of 0. */
446 c = 0;
447 }
448 else if (w0 - w > LB_SQRT_INFINITY)
449 c = LB_INFINITY;
450 else
451 c = (w0 - w) * (w0 - w);
452 if (n2->type == NODE_PENALTY)
453 c = cost_add (c, n2->value);
454 c = cost_add (c, hc);
455
456 DISTANCE (p, i, j) = c;
457 DISTANCE (p, j, i) = c;
458 }
459 }
460
461#if 0
462 fprintf (stderr, "@@DISTANCE:\n");
463 for (i = 0; i < p->brkp_count; ++i)
464 {
465 for (j = 0; j < p->brkp_count; ++j)
466 fprintf (stderr, "%6d", DISTANCE (p, i, j));
467 fprintf (stderr, "\n");
468 }
469 fprintf (stderr, "@@END\n");
470#endif
471 return 0;
472}
473
474
475static int lb_dijkstra (struct lb *p)
476{
477 int i, j, n, best, bcost, c;
478
479 n = p->brkp_count;
480
481 for (i = 0; i < n; ++i)
482 {
483 p->brkps[i].cost = DISTANCE (p, 0, i);
484 p->brkps[i].chain = 0;
485 p->brkps[i].special = FALSE;
486 }
487 p->brkps[0].special = TRUE;
488
489 for (i = 1; i < n; ++i)
490 {
491 best = 0; bcost = LB_INFINITY + 1;
492 for (j = 0; j < n; ++j)
493 if (!p->brkps[j].special && p->brkps[j].cost < bcost)
494 {
495 best = j; bcost = p->brkps[j].cost;
496 }
497 if (best == n - 1)
498 break;
499 p->brkps[best].special = TRUE;
500 for (j = best + 1; j < n; ++j)
501 if (!p->brkps[j].special)
502 {
503 c = bcost + DISTANCE (p, best, j);
504 if (c < p->brkps[j].cost)
505 {
506 p->brkps[j].cost = c;
507 p->brkps[j].chain = best;
508 }
509 }
510 }
511 return 0;
512}
513
514
515static int lb_reverse (struct lb *p)
516{
517 int i, j, k;
518
519 i = p->brkp_count - 1;
520 j = -1;
521 while (i != 0)
522 {
523 if (p->brkps[i].chain >= i)
524 return LB_INTERN;
525 k = p->brkps[i].chain; p->brkps[i].chain = j; j = i; i = k;
526 }
527 p->brkps[0].chain = j;
528
529#if 0
530 fprintf (stderr, "[");
531 for (i = 0; i != -1; i = p->brkps[i].chain)
532 fprintf (stderr, " %d", i);
533 fprintf (stderr, "]\n");
534#endif
535
536 return 0;
537}
538
539
540static int lb_break (struct lb *p)
541{
542 int i;
543 struct node **nn, *n2, *n3, *nb;
544
545 i = p->brkps[0].chain;
546 nb = (i >= p->brkp_count - 1 ? NULL : p->brkps[i].node);
547 for (nn = &p->node_list; *nn != NULL; nn = &(*nn)->next)
548 {
549 if ((*nn) == nb)
550 {
551 switch (nb->type)
552 {
553 case NODE_DISCR:
554 n2 = lb_new (p, NODE_NEWLINE, 0, NULL, NULL);
555 n2->next = (*nn)->next; (*nn) = n2;
556
557 if (nb->pre != NULL && nb->pre[0] != 0)
558 {
559 n2 = lb_new (p, NODE_PRE, nb->value_pre, nb->pre, NULL);
560 n2->next = (*nn); (*nn) = n2;
561 nn = &n2->next;
562 }
563
564 if (nb->post != NULL && nb->post[0] != 0)
565 {
566 n2 = lb_new (p, NODE_POST, nb->value_post, nb->post, NULL);
567 n2->next = (*nn)->next; (*nn)->next = n2;
568 nn = &n2->next;
569 }
570
571 lb_free_node (nb);
572 break;
573
574 case NODE_GLUE:
575 (*nn)->type = NODE_NEWLINE;
576 for (n2 = (*nn)->next; n2 != NULL && n2->type == NODE_GLUE;
577 n2 = n3)
578 {
579 n3 = n2->next;
580 lb_free_node (n2);
581 }
582 (*nn)->next = n2;
583 break;
584
585 case NODE_PENALTY:
586 n2 = lb_new (p, NODE_NEWLINE, 0, NULL, NULL);
587 n2->next = (*nn); (*nn) = n2;
588 break;
589
590 default:
591 return LB_INTERN;
592 }
593 i = p->brkps[i].chain;
594 nb = (i >= p->brkp_count - 1 ? NULL : p->brkps[i].node);
595 }
596 }
597 return 0;
598}
599
600
601int lb_format (struct lb *p)
602{
603 int rc;
604
605 rc = lb_penalty (p, 0);
606 if (rc != 0) return rc;
607
608 if (p->hyphenation != NULL)
609 {
610 rc = lb_hyphenation (p);
611 if (rc != 0) return rc;
612 }
613
614 rc = lb_find_brkps (p, FALSE);
615 if (rc != 0) return rc;
616 p->brkps = malloc ((size_t)(p->brkp_count * sizeof (struct brkp)));
617 if (p->brkps == NULL) return LB_NOMEM;
618 rc = lb_find_brkps (p, TRUE);
619 if (rc != 0) return rc;
620
621 rc = lb_compute_distance (p);
622 if (rc != 0) return rc;
623
624 rc = lb_dijkstra (p);
625 if (rc != 0) return rc;
626
627 rc = lb_reverse (p);
628 if (rc != 0) return rc;
629
630 rc = lb_break (p);
631 if (rc != 0) return rc;
632
633 p->cur_node = p->node_list;
634 return 0;
635}
636
637
638int lb_next (struct lb *p, struct lb_node *dst)
639{
640 const struct node *n1;
641
642redo:
643 n1 = p->cur_node;
644 if (n1 == NULL)
645 {
646 dst->type = LBN_END;
647 dst->value = 0;
648 dst->word = NULL;
649 dst->info = NULL;
650 return 0;
651 }
652 p->cur_node = p->cur_node->next;
653
654 switch (n1->type)
655 {
656 case NODE_WORD:
657 case NODE_DISCR:
658 if (n1->word == NULL || n1->word[0] == 0)
659 goto redo;
660 dst->type = LBN_WORD;
661 break;
662 case NODE_PRE:
663 dst->type = LBN_PRE;
664 break;
665 case NODE_POST:
666 dst->type = LBN_POST;
667 break;
668 case NODE_GLUE:
669 dst->type = LBN_GLUE;
670 break;
671 case NODE_PENALTY:
672 goto redo;
673 case NODE_NEWLINE:
674 dst->type = LBN_NEWLINE;
675 break;
676 default:
677 return LB_INTERN;
678 }
679 dst->value = n1->value;
680 dst->word = n1->word;
681 dst->info = n1->info;
682 return 0;
683}
684
685
686int lbh_init (struct lbh **pp)
687{
688 struct lbh *p;
689 int i;
690
691 *pp = NULL;
692 p = malloc (sizeof (struct lbh));
693 if (p == NULL) return LB_NOMEM;
694 for (i = 0; i < HASH_SIZE; ++i)
695 p->hash_table[i] = NULL;
696 *pp = p;
697 return 0;
698}
699
700
701int lbh_exit (struct lbh **pp)
702{
703 struct lbh *p;
704 struct hword *w1, *w2;
705 int i;
706
707 p = *pp; *pp = NULL;
708 if (p != NULL)
709 {
710 for (i = 0; i < HASH_SIZE; ++i)
711 for (w1 = p->hash_table[i]; w1 != NULL; w1 = w2)
712 {
713 w2 = w1->next;
714 free (w1->str);
715 free (w1);
716 }
717 free (p);
718 }
719 return 0;
720}
721
722
723static unsigned lbh_hash (const char *s)
724{
725 const unsigned char *u;
726 unsigned h;
727
728 h = 0;
729 for (u = s; *u != 0; ++u)
730 if (*u != LB_HYPHEN)
731 h = (h << 2) ^ *u;
732 return h % HASH_SIZE;
733}
734
735
736static int lbh_equal (const char *s1, const char *s2)
737{
738 while (*s1 != 0 || *s2 != 0)
739 {
740 if (*s1 == LB_HYPHEN)
741 ++s1;
742 else if (*s2 == LB_HYPHEN)
743 ++s2;
744 else if (*s1 == *s2)
745 ++s1, ++s2;
746 else
747 return FALSE;
748 }
749 return TRUE;
750}
751
752
753int lbh_word (struct lbh *p, const char *s)
754{
755 unsigned h;
756 struct hword *w;
757
758 h = lbh_hash (s);
759 for (w = p->hash_table[h]; w != NULL; w = w->next)
760 if (lbh_equal (w->str, s))
761 return (strcmp (w->str, s) == 0 ? 0 : LB_INVAL);
762 w = malloc (sizeof (struct hword));
763 if (w == NULL) return LB_NOMEM;
764 w->str = strdup (s);
765 if (w->str == NULL)
766 {
767 free (w);
768 return LB_NOMEM;
769 }
770 w->next = p->hash_table[h];
771 p->hash_table[h] = w;
772 return 0;
773}
774
775
776static const struct hword *lbh_find (const struct lbh *p, const char *s)
777{
778 unsigned h;
779 struct hword *w;
780
781 h = lbh_hash (s);
782 for (w = p->hash_table[h]; w != NULL; w = w->next)
783 if (lbh_equal (w->str, s))
784 return w;
785 return NULL;
786}
Note: See TracBrowser for help on using the repository browser.