source: trunk/emx/src/emxdoc/input.c@ 2653

Last change on this file since 2653 was 18, checked in by bird, 23 years ago

Initial revision

  • Property cvs2svn:cvs-rev set to 1.1
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 16.5 KB
Line 
1/* input.c -- Read and tokenize the input file
2 Copyright (c) 1993-1999 Eberhard Mattes
3
4This file is part of emxdoc.
5
6emxdoc is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11emxdoc is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with emxdoc; see the file COPYING. If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
20
21
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <ctype.h>
26#include "emxdoc.h"
27#include "cond.h"
28
29#define COND_STACK_SIZE 8
30
31struct cond
32{
33 int start_line;
34 int true;
35 int else_seen;
36};
37
38struct recode_table
39{
40 enum enc inp;
41 enum enc out;
42 uchar tab[256];
43};
44
45static struct cond cond_stack[COND_STACK_SIZE];
46static int cond_sp;
47
48static const uchar *char_table[ENCODINGS] =
49{
50 /* ENC_CP850 */
51 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
52 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\xa0"
53 "\xa1\xa2\xa3\xa4\xa5\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb5\xb6\xb7"
54 "\xb8\xbd\xbe\xc6\xc7\xcf\xd0\xd1\xd2\xd3\xd4\xd6\xd7\xd8\xdd\xde"
55 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xf1\xf3"
56 "\xf4\xf5\xf6\xfa\xfb\xfc\xfd",
57
58 /* ENC_ISO8859_1 */
59 "\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7\xea\xeb\xe8\xef\xee\xec\xc4\xc5"
60 "\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9\xff\xd6\xdc\xf8\xa3\xd8\xd7\xe1"
61 "\xed\xf3\xfa\xf1\xd1\xbf\xae\xac\xbd\xbc\xa1\xab\xbb\xc1\xc2\xc0"
62 "\xa9\xa2\xa5\xe3\xc3\xa4\xf0\xd0\xca\xcb\xc8\xcd\xce\xcf\xa6\xcc"
63 "\xd3\xdf\xd4\xd2\xf5\xd5\xb5\xfe\xde\xda\xdb\xd9\xfd\xdd\xb1\xbe"
64 "\xb6\xa7\xf7\xb7\xb9\xb3\xb2"
65};
66
67static const uchar chars_iso8859_1[] = "";
68
69static const struct recode_table *build_recode_table (enum enc inp,
70 enum enc out)
71{
72 struct recode_table *p;
73 const uchar *pi, *po;
74 int i;
75
76 if (inp < 0 || inp >= ENCODINGS || out < 0 || out >= ENCODINGS)
77 abort ();
78 p = xmalloc (sizeof (*p));
79 p->inp = inp;
80 p->out = out;
81 memset (p->tab, 0, 256);
82 for (i = 0; i < 128; ++i)
83 p->tab[i] = (uchar)i;
84
85 pi = char_table[inp];
86 po = char_table[out];
87 for (i = 0; pi[i] != 0 && po[i] != 0; ++i)
88 if (pi[i] != ' ' && po[i] != ' ')
89 {
90 if (p->tab[pi[i]] != 0)
91 abort ();
92 p->tab[pi[i]] = po[i];
93 }
94 if (pi[i] != 0 || po[i] != 0)
95 abort ();
96 p->tab[escape] = escape;
97 return p;
98}
99
100static int recode (uchar *s, enum enc inp, enum enc out)
101{
102 static const struct recode_table *rt;
103 const uchar *t;
104
105 if (rt == NULL)
106 rt = build_recode_table (inp, out);
107 else if (inp != rt->inp || out != rt->out)
108 fatal ("recode(): encoding changed");
109 t = rt->tab;
110 while (*s != 0)
111 {
112 if (t[*s] == 0)
113 return *s;
114 *s = t[*s];
115 ++s;
116 }
117 return 0;
118}
119
120static enum enc find_encoding (const uchar *s)
121{
122 char pos[ENCODINGS];
123 char neg[ENCODINGS];
124 uchar c;
125 int i, j;
126
127 memset (pos, 0, ENCODINGS);
128 memset (neg, 0, ENCODINGS);
129 while (*s != 0)
130 {
131 c = *s++;
132 if (c >= 0x80 && c != escape)
133 for (i = 0; i < ENCODINGS; ++i)
134 {
135 if (strchr (char_table[i], c) == NULL)
136 neg[i] = 1;
137 else
138 pos[i] = 1;
139 }
140 }
141
142 i = 0;
143 while (i < ENCODINGS && !pos[i] && !neg[i])
144 ++i;
145 if (i >= ENCODINGS)
146 return ENC_DEFAULT; /* No character codes >= 0x80 */
147
148 j = -1;
149 for (i = 0; i < ENCODINGS; ++i)
150 if (pos[i] && !neg[i])
151 {
152 if (j != -1)
153 return ENC_AMBIGUOUS;
154 j = i;
155 }
156 if (j == -1)
157 return ENC_UNSUPPORTED;
158 return (enum enc)j;
159}
160
161static void choose_encoding (const uchar *s)
162{
163 enum enc e = find_encoding (s);
164 if (e != ENC_DEFAULT)
165 {
166 if (e == ENC_AMBIGUOUS)
167 fatal ("Input encoding ambiguous, please use -i");
168 if (e == ENC_UNSUPPORTED)
169 fatal ("Input encoding unsupported, please use -i");
170 input_encoding = e;
171 if (output_encoding == ENC_DEFAULT)
172 output_encoding = e;
173 }
174}
175
176#define ISARG(C) ((C) == '{' || (C) == '[')
177#define ISARGW(C) (isspace (C) || ISARG (C))
178#define ISENDW(C) (isspace (C) || (C) == 0)
179
180#define SKIP1W(P,AT) do { (P) += (AT); if (isspace (*(P))) ++(P); } while (0)
181#define SKIPW(P,AT) do { (P) += (AT); while (isspace (*(P))) ++(P); } while (0)
182
183void read_line (void)
184{
185 uchar *p;
186 int c1;
187
188redo:
189 ++line_no;
190 if (fgets (input, sizeof (input), input_file) == NULL)
191 {
192 if (ferror (input_file))
193 {
194 perror (input_fname);
195 exit (1);
196 }
197 end_of_file = TRUE;
198 if (cond_sp >= 0)
199 fatal ("%s:%d: Unterminated %cif",
200 input_fname, cond_stack[cond_sp].start_line, escape);
201 return;
202 }
203 p = strchr (input, '\n');
204 if (p == NULL)
205 fatal ("%s:%d: Line too long", input_fname, line_no);
206 *p = 0;
207 if (input_encoding == ENC_DEFAULT && output_encoding != ENC_DEFAULT)
208 choose_encoding (input);
209 if (input_encoding != output_encoding && input_encoding != ENC_DEFAULT)
210 {
211 int c = recode (input, input_encoding, output_encoding);
212 if (c != 0)
213 fatal ("%s:%d: unsupported character 0x%.2x",
214 input_fname, line_no, c);
215 }
216 if (input[0] == escape)
217 {
218 p = input + 1;
219 if (strncmp (p, "c", 1) == 0 && ISENDW (p[1]))
220 goto redo;
221 if (strncmp (p, "if", 2) == 0 && isspace (p[2]))
222 {
223 SKIPW (p, 2);
224 c1 = condition (p);
225 if (cond_sp + 1 >= COND_STACK_SIZE)
226 fatal ("%s:%d: Conditional stack overflow", input_fname, line_no);
227 ++cond_sp;
228 cond_stack[cond_sp].true = c1;
229 cond_stack[cond_sp].start_line = line_no;
230 cond_stack[cond_sp].else_seen = FALSE;
231 goto redo;
232 }
233 else if (strcmp (p, "else") == 0)
234 {
235 if (cond_sp < 0)
236 fatal ("%s:%d: %celse without %cif",
237 input_fname, line_no, escape, escape);
238 if (cond_stack[cond_sp].else_seen)
239 fatal ("%s:%d: Multiple %celse for %cif in line %d",
240 input_fname, line_no, escape, escape,
241 cond_stack[cond_sp].start_line);
242 cond_stack[cond_sp].else_seen = TRUE;
243 cond_stack[cond_sp].true = !cond_stack[cond_sp].true;
244 goto redo;
245 }
246 else if (strcmp (p, "endif") == 0)
247 {
248 if (cond_sp < 0)
249 fatal ("%s:%d: %cendif without %cif",
250 input_fname, line_no, escape, escape);
251 --cond_sp;
252 goto redo;
253 }
254 else if (p[0] == 'h' && p[1] >= '1' && p[1] <= '0' + SECTION_LEVELS)
255 {
256 /* Support h1 inside if */
257 if (cond_sp >= 0 && !cond_stack[cond_sp].true)
258 ++ref_no;
259 }
260 }
261
262 if (cond_sp >= 0 && !cond_stack[cond_sp].true)
263 goto redo;
264
265 p = input;
266 while (isspace (*p))
267 ++p;
268 if (*p == 0)
269 input[0] = 0;
270}
271
272
273void open_input (const char *name)
274{
275
276 line_no = 0; end_of_file = FALSE; cond_sp = -1;
277 input_fname = name;
278 input_file = fopen (input_fname, "rt");
279 if (input_file == NULL)
280 {
281 perror (input_fname);
282 exit (1);
283 }
284}
285
286
287static void invalid_tag (void)
288{
289 fatal ("%s:%d: Invalid tag", input_fname, line_no);
290}
291
292
293int parse_tag (const uchar **ptr)
294{
295 const uchar *p;
296
297 p = *ptr;
298 if (*p != escape)
299 return FALSE;
300 ++p; /* Skip escape character */
301 tg_flags = 0;
302 switch (*p)
303 {
304 case '.':
305 tg_tag = TAG_FULLSTOP;
306 ++p;
307 break;
308
309 case 'b':
310 if (strncmp (p, "bf", 2) == 0 && ISARGW (p[2]))
311 {
312 tg_tag = TAG_STYLE;
313 tg_style = STYLE_BOLD;
314 p += 2;
315 }
316 else if (strncmp (p, "break", 5) == 0 && ISENDW (p[5]))
317 {
318 tg_tag = TAG_BREAK;
319 SKIP1W (p, 5); /* Skip one space */
320 }
321 else if (strcmp (p, "bugs") == 0)
322 tg_tag = TAG_BUGS;
323 else
324 invalid_tag ();
325 break;
326
327 case 'c':
328 if (strncmp (p, "compat", 6) == 0 && isspace (p[6]))
329 {
330 tg_tag = TAG_COMPAT;
331 p += 6;
332 }
333 else
334 invalid_tag ();
335 break;
336
337 case 'd':
338 if (strcmp (p, "description") == 0)
339 tg_tag = TAG_DESCRIPTION;
340 else
341 invalid_tag ();
342 break;
343
344 case 'e':
345 if (strncmp (p, "em", 2) == 0 && ISARGW (p[2]))
346 {
347 tg_tag = TAG_STYLE;
348 tg_style = STYLE_EMPHASIZE;
349 p += 2;
350 }
351 else if (strcmp (p, "enddescription") == 0)
352 tg_tag = TAG_ENDDESCRIPTION;
353 else if (strcmp (p, "endenumerate") == 0)
354 tg_tag = TAG_ENDENUMERATE;
355 else if (strcmp (p, "endexample") == 0)
356 tg_tag = TAG_ENDEXAMPLE;
357 else if (strcmp (p, "endheaders") == 0)
358 tg_tag = TAG_ENDHEADERS;
359 else if (strcmp (p, "endhtml") == 0)
360 tg_tag = TAG_ENDHTML;
361 else if (strcmp (p, "endindent") == 0)
362 tg_tag = TAG_ENDINDENT;
363 else if (strcmp (p, "enditemize") == 0)
364 tg_tag = TAG_ENDITEMIZE;
365 else if (strcmp (p, "endlist") == 0)
366 tg_tag = TAG_ENDLIST;
367 else if (strcmp (p, "endprototype") == 0)
368 tg_tag = TAG_ENDPROTOTYPE;
369 else if (strcmp (p, "endsamplecode") == 0)
370 tg_tag = TAG_ENDSAMPLECODE;
371 else if (strcmp (p, "endverbatim") == 0)
372 tg_tag = TAG_ENDVERBATIM;
373 else if (strcmp (p, "endtypewriter") == 0)
374 tg_tag = TAG_ENDTYPEWRITER;
375 else if (strcmp (p, "endipf") == 0)
376 tg_tag = TAG_ENDIPF;
377 else if (strcmp (p, "endlatex") == 0)
378 tg_tag = TAG_ENDLATEX;
379 else if (strcmp (p, "endtext") == 0)
380 tg_tag = TAG_ENDTEXT;
381 else if (strcmp (p, "endtable") == 0)
382 tg_tag = TAG_ENDTABLE;
383 else if (strcmp (p, "enumerate") == 0)
384 tg_tag = TAG_ENUMERATE;
385 else if (strcmp (p, "errors") == 0)
386 tg_tag = TAG_ERRORS;
387 else if (strcmp (p, "example") == 0)
388 tg_tag = TAG_EXAMPLE;
389 else
390 invalid_tag ();
391 break;
392
393 case 'f':
394 if (strncmp (p, "format", 6) == 0 && isspace (p[6]))
395 {
396 tg_tag = TAG_FORMAT;
397 SKIPW (p, 6);
398 }
399 else if (strncmp (p, "function", 8) == 0 && isspace (p[8]))
400 {
401 tg_tag = TAG_FUNCTION;
402 SKIPW (p, 8);
403 }
404 else
405 invalid_tag ();
406 break;
407
408 case 'h':
409 if (p[1] >= '1' && p[1] <= '0' + SECTION_LEVELS)
410 {
411 tg_tag = TAG_HEADING;
412 tg_level = p[1] - '0';
413 tg_underline = (tg_level == 1 ? '=' : '-');
414 p += 2;
415 while (*p != 0 && !isspace (*p))
416 switch (*p++)
417 {
418 case 'h':
419 tg_flags |= HF_HIDE;
420 break;
421 case 'u':
422 tg_flags |= HF_UNNUMBERED;
423 break;
424 default:
425 invalid_tag ();
426 }
427 SKIPW (p, 0);
428 }
429 else if (p[1] == '-' || p[1] == '=')
430 {
431 tg_tag = TAG_HEADING;
432 tg_level = 0;
433 tg_underline = p[1];
434 SKIPW (p, 2);
435 }
436 else if (strncmp (p, "hpt", 3) == 0 && ISARGW (p[3]))
437 {
438 tg_tag = TAG_HPT;
439 p += 3;
440 }
441 else if (strcmp (p, "headers") == 0)
442 tg_tag = TAG_HEADERS;
443 else if (strcmp (p, "hints") == 0)
444 tg_tag = TAG_HINTS;
445 else if (strcmp (p, "html") == 0)
446 tg_tag = TAG_HTML;
447 else if (strncmp (p, "htmlfragment", 12) == 0 && isspace (p[12]))
448 {
449 tg_tag = TAG_HTMLFRAGMENT;
450 SKIPW (p, 12);
451 }
452 else if (strcmp (p, "htmlminitoc") == 0)
453 tg_tag = TAG_HTMLMINITOC;
454 else
455 invalid_tag ();
456 break;
457
458 case 'i':
459 if (p[1] >= '1' && p[1] <= '2')
460 {
461 tg_tag = TAG_INDEX;
462 tg_level = p[1] - '0';
463 SKIPW (p, 2);
464 }
465 else if (strcmp (p, "implementation") == 0)
466 tg_tag = TAG_IMPLEMENTATION;
467 else if (strcmp (p, "ipf") == 0)
468 tg_tag = TAG_IPF;
469 else if (strcmp (p, "ipfminitoc") == 0)
470 tg_tag = TAG_IPFMINITOC;
471 else if (strncmp (p, "item", 4) == 0 && ISENDW (p[4]))
472 {
473 tg_tag = TAG_ITEM;
474 SKIPW (p, 4);
475 }
476 else if (strncmp (p, "index", 5) == 0 && isspace (p[5]))
477 {
478 tg_tag = TAG_INDEX;
479 tg_level = 0;
480 SKIPW (p, 5);
481 }
482 else if (strcmp (p, "indent") == 0)
483 tg_tag = TAG_INDENT;
484 else if (strcmp (p, "itemize") == 0)
485 tg_tag = TAG_ITEMIZE;
486 else
487 invalid_tag ();
488 break;
489
490 case 'k':
491 if (strncmp (p, "keyword ", 8) == 0)
492 {
493 tg_tag = TAG_KEYWORD;
494 SKIPW (p, 8);
495 }
496 else
497 invalid_tag ();
498 break;
499
500 case 'l':
501 if (strncmp (p, "label ", 6) == 0)
502 {
503 tg_tag = TAG_LABEL;
504 SKIPW (p, 6);
505 }
506 else if (strncmp (p, "language", 8) == 0 && isspace (p[8]))
507 {
508 tg_tag = TAG_LANGUAGE;
509 SKIPW (p, 8);
510 }
511 else if (strcmp (p, "latex") == 0)
512 tg_tag = TAG_LATEX;
513 else if (strcmp (p, "list") == 0)
514 tg_tag = TAG_LIST;
515 else
516 invalid_tag ();
517 break;
518
519 case 'm':
520 if (strcmp (p, "minitoc") == 0)
521 tg_tag = TAG_MINITOC;
522 else
523 invalid_tag ();
524 break;
525 case 'p':
526 if (strcmp (p, "prototype") == 0)
527 tg_tag = TAG_PROTOTYPE;
528 else if (strncmp (p, "param", 5) == 0 && isspace (p[5]))
529 {
530 tg_tag = TAG_PARAM;
531 SKIPW (p, 5);
532 }
533 else if (strncmp (p, "pa", 2) == 0 && ISARGW (p[2]))
534 {
535 tg_tag = TAG_STYLE;
536 tg_style = STYLE_PARAM;
537 p += 2;
538 }
539 else
540 invalid_tag ();
541 break;
542
543 case 'r':
544 if (strncmp (p, "ref", 3) == 0 && ISARGW (p[3]))
545 {
546 tg_tag = TAG_REF;
547 p += 3;
548 }
549 else if (strncmp (p, "replace", 7) == 0 && isspace (p[7]))
550 {
551 tg_tag = TAG_REPLACE;
552 SKIPW (p, 7);
553 }
554 else if (strcmp (p, "restrictions") == 0)
555 tg_tag = TAG_RESTRICTIONS;
556 else if (strcmp (p, "returnvalue") == 0)
557 tg_tag = TAG_RETURNVALUE;
558 else
559 invalid_tag ();
560 break;
561
562 case 's':
563 if (strncmp (p, "seealso", 7) == 0 && isspace (p[7]))
564 {
565 tg_tag = TAG_SEEALSO;
566 p += 7;
567 }
568 else if (strcmp (p, "samplecode") == 0)
569 tg_tag = TAG_SAMPLECODE;
570 else if (strncmp (p, "samplefile", 10) == 0 && isspace (p[10]))
571 {
572 tg_tag = TAG_SAMPLEFILE;
573 SKIPW (p, 10);
574 }
575 else if (strncmp (p, "set", 3) == 0 && isspace (p[3]))
576 {
577 tg_tag = TAG_SET;
578 SKIPW (p, 3);
579 }
580 else if (strncmp (p, "sl", 2) == 0 && ISARGW (p[2]))
581 {
582 tg_tag = TAG_STYLE;
583 tg_style = STYLE_SLANTED;
584 p += 2;
585 }
586 else if (strncmp (p, "special", 7) == 0 && isspace (p[7]))
587 {
588 tg_tag = TAG_SPECIAL;
589 SKIPW (p, 7);
590 }
591 else if (strncmp (p, "sy", 2) == 0 && ISARGW (p[2]))
592 {
593 tg_tag = TAG_STYLE;
594 tg_style = STYLE_SYNTAX;
595 p += 2;
596 }
597 else if (strncmp (p, "syntax", 6) == 0 && isspace (p[6]))
598 {
599 tg_tag = TAG_SYNTAX;
600 SKIPW (p, 6);
601 }
602 else
603 invalid_tag ();
604 break;
605
606 case 't':
607 if (strncmp (p, "tt", 2) == 0 && ISARGW (p[2]))
608 {
609 tg_tag = TAG_STYLE;
610 tg_style = STYLE_TTY;
611 p += 2;
612 }
613 else if (strcmp (p, "toc") == 0)
614 tg_tag = TAG_TOC;
615 else if (strcmp (p, "text") == 0)
616 tg_tag = TAG_TEXT;
617 else if (strncmp (p, "table ", 6) == 0)
618 {
619 tg_tag = TAG_TABLE;
620 SKIPW (p, 6);
621 }
622 else if (strncmp (p, "title ", 6) == 0)
623 {
624 tg_tag = TAG_TITLE;
625 SKIPW (p, 6);
626 }
627 else if (strcmp (p, "typewriter") == 0)
628 tg_tag = TAG_TYPEWRITER;
629 else
630 invalid_tag ();
631 break;
632
633 case 'u':
634 if (strncmp (p, "ul", 2) == 0 && ISARGW (p[2]))
635 {
636 tg_tag = TAG_STYLE;
637 tg_style = STYLE_UNDERLINE;
638 p += 2;
639 }
640 else
641 invalid_tag ();
642 break;
643
644 case 'v':
645 if (strcmp (p, "verbatim") == 0)
646 tg_tag = TAG_VERBATIM;
647 else
648 invalid_tag ();
649 break;
650
651 default:
652 invalid_tag ();
653 }
654 *ptr = p;
655 return TRUE;
656}
Note: See TracBrowser for help on using the repository browser.