source: trunk/src/oldsed/sed/compile.c

Last change on this file was 1301, checked in by bird, 18 years ago

Added options for sending the output to a file without having to make use of redirection (-o, --output, --output-text, --output-binary).

File size: 41.3 KB
Line 
1/* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* compile.c: translate sed source into internal form */
20
21#include "sed.h"
22#include "strverscmp.h"
23#include <stdio.h>
24#include <ctype.h>
25
26#ifdef HAVE_STRINGS_H
27# include <strings.h>
28# ifdef HAVE_MEMORY_H
29# include <memory.h>
30# endif
31#else
32# include <string.h>
33#endif /* HAVE_STRINGS_H */
34
35#ifdef HAVE_STDLIB_H
36# include <stdlib.h>
37#endif
38#ifndef EXIT_FAILURE
39# define EXIT_FAILURE 1
40#endif
41
42#ifdef HAVE_SYS_TYPES_H
43# include <sys/types.h>
44#endif
45
46#include <obstack.h>
47
48
49
50#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
51#define VECTOR_ALLOC_INCREMENT 40
52
53/* let's not confuse text editors that have only dumb bracket-matching... */
54#define OPEN_BRACKET '['
55#define CLOSE_BRACKET ']'
56#define OPEN_BRACE '{'
57#define CLOSE_BRACE '}'
58
59struct prog_info {
60 /* When we're reading a script command from a string, `prog.base'
61 points to the first character in the string, 'prog.cur' points
62 to the current character in the string, and 'prog.end' points
63 to the end of the string. This allows us to compile script
64 strings that contain nulls. */
65 const unsigned char *base;
66 const unsigned char *cur;
67 const unsigned char *end;
68
69 /* This is the current script file. If it is NULL, we are reading
70 from a string stored at `prog.cur' instead. If both `prog.file'
71 and `prog.cur' are NULL, we're in trouble! */
72 FILE *file;
73};
74
75/* Information used to give out useful and informative error messages. */
76struct error_info {
77 /* This is the name of the current script file. */
78 const char *name;
79
80 /* This is the number of the current script line that we're compiling. */
81 countT line;
82
83 /* This is the index of the "-e" expressions on the command line. */
84 countT string_expr_count;
85};
86
87
88/* Label structure used to resolve GOTO's, labels, and block beginnings. */
89struct sed_label {
90 countT v_index; /* index of vector element being referenced */
91 char *name; /* NUL-terminated name of the label */
92 struct error_info err_info; /* track where `{}' blocks start */
93 struct sed_label *next; /* linked list (stack) */
94};
95
96struct special_files {
97 struct output outf;
98 FILE **pfp;
99};
100
101FILE *my_stdin, *my_stdout, *my_stderr;
102struct special_files special_files[] = {
103 { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
104 { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
105 { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
106 { { NULL, false, NULL, NULL }, NULL }
107};
108
109
110
111/* Where we are in the processing of the input. */
112static struct prog_info prog;
113static struct error_info cur_input;
114
115/* Information about labels and jumps-to-labels. This is used to do
116 the required backpatching after we have compiled all the scripts. */
117static struct sed_label *jumps = NULL;
118static struct sed_label *labels = NULL;
119
120/* We wish to detect #n magic only in the first input argument;
121 this flag tracks when we have consumed the first file of input. */
122static bool first_script = true;
123
124/* Allow for scripts like "sed -e 'i\' -e foo": */
125static struct buffer *pending_text = NULL;
126static struct text_buf *old_text_buf = NULL;
127
128/* Information about block start positions. This is used to backpatch
129 block end positions. */
130static struct sed_label *blocks = NULL;
131
132/* Use an obstack for compilation. */
133static struct obstack obs;
134
135/* Various error messages we may want to print */
136static const char errors[] =
137 "multiple `!'s\0"
138 "unexpected `,'\0"
139 "invalid usage of +N or ~N as first address\0"
140 "unmatched `{'\0"
141 "unexpected `}'\0"
142 "extra characters after command\0"
143 "expected \\ after `a', `c' or `i'\0"
144 "`}' doesn't want any addresses\0"
145 ": doesn't want any addresses\0"
146 "comments don't accept any addresses\0"
147 "missing command\0"
148 "command only uses one address\0"
149 "unterminated address regex\0"
150 "unterminated `s' command\0"
151 "unterminated `y' command\0"
152 "unknown option to `s'\0"
153 "multiple `p' options to `s' command\0"
154 "multiple `g' options to `s' command\0"
155 "multiple number options to `s' command\0"
156 "number option to `s' command may not be zero\0"
157 "strings for `y' command are different lengths\0"
158 "delimiter character is not a single-byte character\0"
159 "expected newer version of sed\0"
160 "invalid usage of line address 0\0"
161 "unknown command: `%c'";
162
163#define BAD_BANG (errors)
164#define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
165#define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
166#define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
167#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
168#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
169#define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
170#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
171#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
172#define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
173#define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
174#define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
175#define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
176#define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
177#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
178#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
179#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
180#define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
181#define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
182#define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
183#define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
184#define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
185#define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
186#define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
187#define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
188#define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
189
190static struct output *file_read = NULL;
191static struct output *file_write = NULL;
192
193
194
195/* Complain about an unknown command and exit. */
196void
197bad_command(ch)
198 char ch;
199{
200 const char *msg = _(UNKNOWN_CMD);
201 char *unknown_cmd = xmalloc(strlen(msg));
202 sprintf(unknown_cmd, msg, ch);
203 bad_prog(unknown_cmd);
204}
205
206/* Complain about a programming error and exit. */
207void
208bad_prog(why)
209 const char *why;
210{
211 if (cur_input.name)
212 fprintf(stderr, _("%s: file %s line %lu: %s\n"),
213 myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
214 else
215 fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
216 myname,
217 CAST(unsigned long)cur_input.string_expr_count,
218 CAST(unsigned long)(prog.cur-prog.base),
219 why);
220 exit(EXIT_FAILURE);
221}
222
223
224
225/* Read the next character from the program. Return EOF if there isn't
226 anything to read. Keep cur_input.line up to date, so error messages
227 can be meaningful. */
228static int inchar P_((void));
229static int
230inchar()
231{
232 int ch = EOF;
233
234 if (prog.cur)
235 {
236 if (prog.cur < prog.end)
237 ch = *prog.cur++;
238 }
239 else if (prog.file)
240 {
241 if (!feof(prog.file))
242 ch = getc(prog.file);
243 }
244 if (ch == '\n')
245 ++cur_input.line;
246 return ch;
247}
248
249/* unget `ch' so the next call to inchar will return it. */
250static void savchar P_((int ch));
251static void
252savchar(ch)
253 int ch;
254{
255 if (ch == EOF)
256 return;
257 if (ch == '\n' && cur_input.line > 0)
258 --cur_input.line;
259 if (prog.cur)
260 {
261 if (prog.cur <= prog.base || *--prog.cur != ch)
262 panic("Called savchar() with unexpected pushback (%x)",
263 CAST(unsigned char)ch);
264 }
265 else
266 ungetc(ch, prog.file);
267}
268
269/* Read the next non-blank character from the program. */
270static int in_nonblank P_((void));
271static int
272in_nonblank()
273{
274 int ch;
275 do
276 ch = inchar();
277 while (ISBLANK(ch));
278 return ch;
279}
280
281/* Read an integer value from the program. */
282static countT in_integer P_((int ch));
283static countT
284in_integer(ch)
285 int ch;
286{
287 countT num = 0;
288
289 while (ISDIGIT(ch))
290 {
291 num = num * 10 + ch - '0';
292 ch = inchar();
293 }
294 savchar(ch);
295 return num;
296}
297
298static int add_then_next P_((struct buffer *b, int ch));
299static int
300add_then_next(b, ch)
301 struct buffer *b;
302 int ch;
303{
304 add1_buffer(b, ch);
305 return inchar();
306}
307
308static char * convert_number P_((char *, char *, const char *, int, int, int));
309static char *
310convert_number(result, buf, bufend, base, maxdigits, default_char)
311 char *result;
312 char *buf;
313 const char *bufend;
314 int base;
315 int maxdigits;
316 int default_char;
317{
318 int n = 0;
319 char *p;
320
321 for (p=buf; p < bufend && maxdigits-- > 0; ++p)
322 {
323 int d = -1;
324 switch (*p)
325 {
326 case '0': d = 0x0; break;
327 case '1': d = 0x1; break;
328 case '2': d = 0x2; break;
329 case '3': d = 0x3; break;
330 case '4': d = 0x4; break;
331 case '5': d = 0x5; break;
332 case '6': d = 0x6; break;
333 case '7': d = 0x7; break;
334 case '8': d = 0x8; break;
335 case '9': d = 0x9; break;
336 case 'A': case 'a': d = 0xa; break;
337 case 'B': case 'b': d = 0xb; break;
338 case 'C': case 'c': d = 0xc; break;
339 case 'D': case 'd': d = 0xd; break;
340 case 'E': case 'e': d = 0xe; break;
341 case 'F': case 'f': d = 0xf; break;
342 }
343 if (d < 0 || base <= d)
344 break;
345 n = n * base + d;
346 }
347 if (p == buf)
348 *result = default_char;
349 else
350 *result = n;
351 return p;
352}
353
354
355
356/* Read in a filename for a `r', `w', or `s///w' command. */
357static struct buffer *read_filename P_((void));
358static struct buffer *
359read_filename()
360{
361 struct buffer *b;
362 int ch;
363
364 b = init_buffer();
365 ch = in_nonblank();
366 while (ch != EOF && ch != '\n')
367 {
368#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
369 if (posixicity == POSIXLY_EXTENDED)
370 if (ch == ';' || ch == '#')
371 {
372 savchar(ch);
373 break;
374 }
375#endif
376 ch = add_then_next(b, ch);
377 }
378 add1_buffer(b, '\0');
379 return b;
380}
381
382static struct output *get_openfile P_((struct output **file_ptrs, char *mode, bool fail));
383static struct output *
384get_openfile(file_ptrs, mode, fail)
385 struct output **file_ptrs;
386 char *mode;
387 bool fail;
388{
389 struct buffer *b;
390 char *file_name;
391 struct output *p;
392 int is_stderr;
393
394 b = read_filename();
395 file_name = get_buffer(b);
396 for (p=*file_ptrs; p; p=p->link)
397 if (strcmp(p->name, file_name) == 0)
398 break;
399
400 if (posixicity == POSIXLY_EXTENDED)
401 {
402 /* Check whether it is a special file (stdin, stdout or stderr) */
403 struct special_files *special = special_files;
404
405 /* std* sometimes are not constants, so they
406 cannot be used in the initializer for special_files */
407#ifndef CONFIG_WITHOUT_O_OPT
408 my_stdin = stdin; my_stdout = sed_stdout; my_stderr = stderr;
409#else
410 my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
411#endif
412 for (special = special_files; special->outf.name; special++)
413 if (strcmp(special->outf.name, file_name) == 0)
414 {
415 special->outf.fp = *special->pfp;
416 free_buffer (b);
417 return &special->outf;
418 }
419 }
420
421 if (!p)
422 {
423 p = OB_MALLOC(&obs, 1, struct output);
424 p->name = ck_strdup(file_name);
425 p->fp = ck_fopen(p->name, mode, fail);
426 p->missing_newline = false;
427 p->link = *file_ptrs;
428 *file_ptrs = p;
429 }
430 free_buffer(b);
431 return p;
432}
433
434
435
436static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
437static struct sed_cmd *
438next_cmd_entry(vectorp)
439 struct vector **vectorp;
440{
441 struct sed_cmd *cmd;
442 struct vector *v;
443
444 v = *vectorp;
445 if (v->v_length == v->v_allocated)
446 {
447 v->v_allocated += VECTOR_ALLOC_INCREMENT;
448 v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
449 }
450
451 cmd = v->v + v->v_length;
452 cmd->a1 = NULL;
453 cmd->a2 = NULL;
454 cmd->range_state = RANGE_INACTIVE;
455 cmd->addr_bang = false;
456 cmd->cmd = '\0'; /* something invalid, to catch bugs early */
457
458 *vectorp = v;
459 return cmd;
460}
461
462static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
463static int
464snarf_char_class(b, cur_stat)
465 struct buffer *b;
466 mbstate_t *cur_stat;
467{
468 int ch;
469 int state = 0;
470 int delim;
471 bool pending_mb = 0;
472
473 ch = inchar();
474 if (ch == '^')
475 ch = add_then_next(b, ch);
476 if (ch == CLOSE_BRACKET)
477 ch = add_then_next(b, ch);
478
479 /* States are:
480 0 outside a collation element, character class or collation class
481 1 after the bracket
482 2 after the opening ./:/=
483 3 after the closing ./:/= */
484
485 for (;; ch = add_then_next (b, ch))
486 {
487 pending_mb = BRLEN (ch, cur_stat) != 1;
488
489 switch (ch)
490 {
491 case EOF:
492 case '\n':
493 return ch;
494
495 case '.':
496 case ':':
497 case '=':
498 if (pending_mb)
499 continue;
500
501 if (state == 1)
502 {
503 delim = ch;
504 state++;
505 }
506 else if (ch == delim && state == 2)
507 state++;
508 else
509 break;
510
511 continue;
512
513 case OPEN_BRACKET:
514 if (pending_mb)
515 continue;
516
517 state++;
518 continue;
519
520 case CLOSE_BRACKET:
521 if (pending_mb)
522 continue;
523
524 if (state == 0 || state == 1)
525 return ch;
526 else if (state == 3)
527 state = 0;
528
529 break;
530
531 default:
532 break;
533 }
534
535 /* Getting a character different from .=: whilst in state 1
536 goes back to state 0, getting a character different from ]
537 whilst in state 3 goes back to state 2. */
538 state &= ~1;
539 }
540}
541
542static struct buffer *match_slash P_((int slash, bool regex));
543static struct buffer *
544match_slash(slash, regex)
545 int slash;
546 bool regex;
547{
548 struct buffer *b;
549 int ch;
550 bool pending_mb = false;
551 mbstate_t cur_stat;
552
553 memset (&cur_stat, 0, sizeof (mbstate_t));
554
555 if (BRLEN (slash, &cur_stat) == -2)
556 if (BRLEN (slash, &cur_stat) == -2)
557 bad_prog (BAD_DELIM);
558
559 memset (&cur_stat, 0, sizeof (mbstate_t));
560
561 b = init_buffer();
562 while ((ch = inchar()) != EOF && ch != '\n')
563 {
564 pending_mb = BRLEN (ch, &cur_stat) != 1;
565 pending_mb = BRLEN (ch, &cur_stat) != 1;
566
567 if (!pending_mb)
568 {
569 if (ch == slash)
570 return b;
571 else if (ch == '\\')
572 {
573 ch = inchar();
574 if (ch == EOF)
575 break;
576#ifndef REG_PERL
577 else if (ch == 'n' && regex)
578 ch = '\n';
579#endif
580 else if (ch != '\n' && ch != slash)
581 add1_buffer(b, '\\');
582 }
583 else if (ch == OPEN_BRACKET && regex)
584 {
585 add1_buffer(b, ch);
586 ch = snarf_char_class(b, &cur_stat);
587 if (ch != CLOSE_BRACKET)
588 break;
589 }
590 }
591
592 add1_buffer(b, ch);
593 }
594
595 if (ch == '\n')
596 savchar(ch); /* for proper line number in error report */
597 free_buffer(b);
598 return NULL;
599}
600
601static int mark_subst_opts P_((struct subst *cmd));
602static int
603mark_subst_opts(cmd)
604 struct subst *cmd;
605{
606 int flags = 0;
607 int ch;
608
609 cmd->global = false;
610 cmd->print = false;
611 cmd->eval = false;
612 cmd->numb = 0;
613 cmd->outf = NULL;
614
615 for (;;)
616 switch ( (ch = in_nonblank()) )
617 {
618 case 'i': /* GNU extension */
619 case 'I': /* GNU extension */
620 flags |= REG_ICASE;
621 break;
622
623#ifdef REG_PERL
624 case 's': /* GNU extension */
625 case 'S': /* GNU extension */
626 if (extended_regexp_flags & REG_PERL)
627 flags |= REG_DOTALL;
628 break;
629
630 case 'x': /* GNU extension */
631 case 'X': /* GNU extension */
632 if (extended_regexp_flags & REG_PERL)
633 flags |= REG_EXTENDED;
634 break;
635#endif
636
637 case 'm': /* GNU extension */
638 case 'M': /* GNU extension */
639 flags |= REG_NEWLINE;
640 break;
641
642 case 'e':
643 cmd->eval = true;
644 break;
645
646 case 'p':
647 if (cmd->print)
648 bad_prog(_(EXCESS_P_OPT));
649 cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
650 break;
651
652 case 'g':
653 if (cmd->global)
654 bad_prog(_(EXCESS_G_OPT));
655 cmd->global = true;
656 break;
657
658 case 'w':
659 cmd->outf = get_openfile(&file_write, "w", true);
660 return flags;
661
662 case '0': case '1': case '2': case '3': case '4':
663 case '5': case '6': case '7': case '8': case '9':
664 if (cmd->numb)
665 bad_prog(_(EXCESS_N_OPT));
666 cmd->numb = in_integer(ch);
667 if (!cmd->numb)
668 bad_prog(_(ZERO_N_OPT));
669 break;
670
671 case CLOSE_BRACE:
672 case '#':
673 savchar(ch);
674 /* Fall Through */
675 case EOF:
676 case '\n':
677 case ';':
678 return flags;
679
680 case '\r':
681 if (inchar() == '\n')
682 return flags;
683 /* FALLTHROUGH */
684
685 default:
686 bad_prog(_(UNKNOWN_S_OPT));
687 /*NOTREACHED*/
688 }
689}
690
691
692
693/* read in a label for a `:', `b', or `t' command */
694static char *read_label P_((void));
695static char *
696read_label()
697{
698 struct buffer *b;
699 int ch;
700 char *ret;
701
702 b = init_buffer();
703 ch = in_nonblank();
704
705 while (ch != EOF && ch != '\n'
706 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
707 ch = add_then_next (b, ch);
708
709 savchar(ch);
710 add1_buffer(b, '\0');
711 ret = ck_strdup(get_buffer(b));
712 free_buffer(b);
713 return ret;
714}
715
716/* Store a label (or label reference) created by a `:', `b', or `t'
717 command so that the jump to/from the label can be backpatched after
718 compilation is complete, or a reference created by a `{' to be
719 backpatched when the corresponding `}' is found. */
720static struct sed_label *setup_label
721 P_((struct sed_label *, countT, char *, const struct error_info *));
722static struct sed_label *
723setup_label(list, idx, name, err_info)
724 struct sed_label *list;
725 countT idx;
726 char *name;
727 const struct error_info *err_info;
728{
729 struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
730 ret->v_index = idx;
731 ret->name = name;
732 if (err_info)
733 MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
734 ret->next = list;
735 return ret;
736}
737
738static struct sed_label *release_label P_((struct sed_label *list_head));
739static struct sed_label *
740release_label(list_head)
741 struct sed_label *list_head;
742{
743 struct sed_label *ret;
744
745 if (!list_head)
746 return NULL;
747 ret = list_head->next;
748
749 FREE(list_head->name);
750
751#if 0
752 /* We use obstacks */
753 FREE(list_head);
754#endif
755 return ret;
756}
757
758static struct replacement *new_replacement P_((char *, size_t,
759 enum replacement_types));
760static struct replacement *
761new_replacement(text, length, type)
762 char *text;
763 size_t length;
764 enum replacement_types type;
765{
766 struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
767
768 r->prefix = text;
769 r->prefix_length = length;
770 r->subst_id = -1;
771 r->repl_type = type;
772
773 /* r-> next = NULL; */
774 return r;
775}
776
777static void setup_replacement P_((struct subst *, const char *, size_t));
778static void
779setup_replacement(sub, text, length)
780 struct subst *sub;
781 const char *text;
782 size_t length;
783{
784 char *base;
785 char *p;
786 char *text_end;
787 enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
788 struct replacement root;
789 struct replacement *tail;
790
791 sub->max_id = 0;
792 base = MEMDUP(text, length, char);
793 length = normalize_text(base, length, TEXT_REPLACEMENT);
794
795 text_end = base + length;
796 tail = &root;
797
798 for (p=base; p<text_end; ++p)
799 {
800 if (*p == '\\')
801 {
802 /* Preceding the backslash may be some literal text: */
803 tail = tail->next =
804 new_replacement(base, CAST(size_t)(p - base), repl_type);
805
806 repl_type = save_type;
807
808 /* Skip the backslash and look for a numeric back-reference,
809 or a case-munging escape if not in POSIX mode: */
810 ++p;
811 if (p < text_end && (posixicity != POSIXLY_BASIC || ISDIGIT (*p)))
812 switch (*p)
813 {
814 case '0': case '1': case '2': case '3': case '4':
815 case '5': case '6': case '7': case '8': case '9':
816 tail->subst_id = *p - '0';
817 if (sub->max_id < tail->subst_id)
818 sub->max_id = tail->subst_id;
819 break;
820
821 case 'L':
822 repl_type = REPL_LOWERCASE;
823 save_type = REPL_LOWERCASE;
824 break;
825
826 case 'U':
827 repl_type = REPL_UPPERCASE;
828 save_type = REPL_UPPERCASE;
829 break;
830
831 case 'E':
832 repl_type = REPL_ASIS;
833 save_type = REPL_ASIS;
834 break;
835
836 case 'l':
837 save_type = repl_type;
838 repl_type |= REPL_LOWERCASE_FIRST;
839 break;
840
841 case 'u':
842 save_type = repl_type;
843 repl_type |= REPL_UPPERCASE_FIRST;
844 break;
845
846 default:
847 p[-1] = *p;
848 ++tail->prefix_length;
849 }
850
851 base = p + 1;
852 }
853 else if (*p == '&')
854 {
855 /* Preceding the ampersand may be some literal text: */
856 tail = tail->next =
857 new_replacement(base, CAST(size_t)(p - base), repl_type);
858
859 repl_type = save_type;
860 tail->subst_id = 0;
861 base = p + 1;
862 }
863 }
864 /* There may be some trailing literal text: */
865 if (base < text_end)
866 tail = tail->next =
867 new_replacement(base, CAST(size_t)(text_end - base), repl_type);
868
869 tail->next = NULL;
870 sub->replacement = root.next;
871}
872
873static void read_text P_((struct text_buf *buf, int leadin_ch));
874static void
875read_text(buf, leadin_ch)
876 struct text_buf *buf;
877 int leadin_ch;
878{
879 int ch;
880
881 /* Should we start afresh (as opposed to continue a partial text)? */
882 if (buf)
883 {
884 if (pending_text)
885 free_buffer(pending_text);
886 pending_text = init_buffer();
887 buf->text = NULL;
888 buf->text_length = 0;
889 old_text_buf = buf;
890 }
891 /* assert(old_text_buf != NULL); */
892
893 if (leadin_ch == EOF)
894 return;
895
896 if (leadin_ch != '\n')
897 add1_buffer(pending_text, leadin_ch);
898
899 ch = inchar();
900 while (ch != EOF && ch != '\n')
901 {
902 if (ch == '\\')
903 {
904 ch = inchar();
905 if (ch != EOF)
906 add1_buffer (pending_text, '\\');
907 }
908
909 if (ch == EOF)
910 {
911 add1_buffer (pending_text, '\n');
912 return;
913 }
914
915 ch = add_then_next (pending_text, ch);
916 }
917
918 add1_buffer(pending_text, '\n');
919 if (!buf)
920 buf = old_text_buf;
921 buf->text_length = normalize_text (get_buffer (pending_text),
922 size_buffer (pending_text), TEXT_BUFFER);
923 buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
924 free_buffer(pending_text);
925 pending_text = NULL;
926}
927
928
929
930/* Try to read an address for a sed command. If it succeeds,
931 return non-zero and store the resulting address in `*addr'.
932 If the input doesn't look like an address read nothing
933 and return zero. */
934static bool compile_address P_((struct addr *addr, int ch));
935static bool
936compile_address(addr, ch)
937 struct addr *addr;
938 int ch;
939{
940 addr->addr_type = ADDR_IS_NULL;
941 addr->addr_step = 0;
942 addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
943 addr->addr_regex = NULL;
944
945 if (ch == '/' || ch == '\\')
946 {
947 int flags = 0;
948 struct buffer *b;
949 addr->addr_type = ADDR_IS_REGEX;
950 if (ch == '\\')
951 ch = inchar();
952 if ( !(b = match_slash(ch, true)) )
953 bad_prog(_(UNTERM_ADDR_RE));
954
955 for(;;)
956 {
957 ch = in_nonblank();
958 switch(ch)
959 {
960 case 'I': /* GNU extension */
961 flags |= REG_ICASE;
962 break;
963
964#ifdef REG_PERL
965 case 'S': /* GNU extension */
966 if (extended_regexp_flags & REG_PERL)
967 flags |= REG_DOTALL;
968 break;
969
970 case 'X': /* GNU extension */
971 if (extended_regexp_flags & REG_PERL)
972 flags |= REG_EXTENDED;
973 break;
974#endif
975
976 case 'M': /* GNU extension */
977 flags |= REG_NEWLINE;
978 break;
979
980 default:
981 savchar (ch);
982 addr->addr_regex = compile_regex (b, flags, 0);
983 free_buffer(b);
984 return true;
985 }
986 }
987 }
988 else if (ISDIGIT(ch))
989 {
990 addr->addr_number = in_integer(ch);
991 addr->addr_type = ADDR_IS_NUM;
992 ch = in_nonblank();
993 if (ch != '~')
994 {
995 savchar(ch);
996 }
997 else
998 {
999 countT step = in_integer(in_nonblank());
1000 if (step > 0)
1001 {
1002 addr->addr_step = step;
1003 addr->addr_type = ADDR_IS_NUM_MOD;
1004 }
1005 }
1006 }
1007 else if (ch == '+' || ch == '~')
1008 {
1009 addr->addr_step = in_integer(in_nonblank());
1010 if (addr->addr_step==0)
1011 ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1012 else if (ch == '+')
1013 addr->addr_type = ADDR_IS_STEP;
1014 else
1015 addr->addr_type = ADDR_IS_STEP_MOD;
1016 }
1017 else if (ch == '$')
1018 {
1019 addr->addr_type = ADDR_IS_LAST;
1020 }
1021 else
1022 return false;
1023
1024 return true;
1025}
1026
1027/* Read a program (or a subprogram within `{' `}' pairs) in and store
1028 the compiled form in `*vector'. Return a pointer to the new vector. */
1029static struct vector *compile_program P_((struct vector *));
1030static struct vector *
1031compile_program(vector)
1032 struct vector *vector;
1033{
1034 struct sed_cmd *cur_cmd;
1035 struct buffer *b;
1036 int ch;
1037
1038 if (!vector)
1039 {
1040 vector = MALLOC(1, struct vector);
1041 vector->v = NULL;
1042 vector->v_allocated = 0;
1043 vector->v_length = 0;
1044
1045 obstack_init (&obs);
1046 }
1047 if (pending_text)
1048 read_text(NULL, '\n');
1049
1050 for (;;)
1051 {
1052 struct addr a;
1053
1054 while ((ch=inchar()) == ';' || ISSPACE(ch))
1055 ;
1056 if (ch == EOF)
1057 break;
1058
1059 cur_cmd = next_cmd_entry(&vector);
1060 if (compile_address(&a, ch))
1061 {
1062 if (a.addr_type == ADDR_IS_STEP
1063 || a.addr_type == ADDR_IS_STEP_MOD)
1064 bad_prog(_(BAD_STEP));
1065
1066 cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1067 ch = in_nonblank();
1068 if (ch == ',')
1069 {
1070 if (!compile_address(&a, in_nonblank()))
1071 bad_prog(_(BAD_COMMA));
1072
1073 cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1074 ch = in_nonblank();
1075 }
1076
1077 if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1078 && cur_cmd->a1->addr_number == 0
1079 && (!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX))
1080 bad_prog(_(INVALID_LINE_0));
1081 }
1082 if (ch == '!')
1083 {
1084 cur_cmd->addr_bang = true;
1085 ch = in_nonblank();
1086 if (ch == '!')
1087 bad_prog(_(BAD_BANG));
1088 }
1089
1090 /* Do not accept extended commands in --posix mode. Also,
1091 a few commands only accept one address in that mode. */
1092 if (posixicity == POSIXLY_BASIC)
1093 switch (ch)
1094 {
1095 case 'v': case 'L': case 'Q': case 'T':
1096 case 'R': case 'W':
1097 bad_command(ch);
1098
1099 case 'a': case 'i': case 'l':
1100 case '=': case 'r':
1101 if (cur_cmd->a2)
1102 bad_prog(_(ONE_ADDR));
1103 }
1104
1105 cur_cmd->cmd = ch;
1106 switch (ch)
1107 {
1108 case '#':
1109 if (cur_cmd->a1)
1110 bad_prog(_(NO_SHARP_ADDR));
1111 ch = inchar();
1112 if (ch=='n' && first_script && cur_input.line < 2)
1113 if ( (prog.base && prog.cur==2+prog.base)
1114 || (prog.file && !prog.base && 2==ftell(prog.file)))
1115 no_default_output = true;
1116 while (ch != EOF && ch != '\n')
1117 ch = inchar();
1118 continue; /* restart the for (;;) loop */
1119
1120 case 'v':
1121 /* This is an extension. Programs needing GNU sed might start
1122 * with a `v' command so that other seds will stop.
1123 * We compare the version and ignore POSIXLY_CORRECT.
1124 */
1125 {
1126 char *version = read_label ();
1127 char *compared_version;
1128 compared_version = (*version == '\0') ? "4.0" : version;
1129 if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1130 bad_prog(_(ANCIENT_VERSION));
1131
1132 free (version);
1133 posixicity = POSIXLY_EXTENDED;
1134 }
1135 continue;
1136
1137 case '{':
1138 blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1139 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1140 break;
1141
1142 case '}':
1143 if (!blocks)
1144 bad_prog(_(EXCESS_CLOSE_BRACE));
1145 if (cur_cmd->a1)
1146 bad_prog(_(NO_CLOSE_BRACE_ADDR));
1147 ch = in_nonblank();
1148 if (ch == CLOSE_BRACE || ch == '#')
1149 savchar(ch);
1150 else if (ch != EOF && ch != '\n' && ch != ';')
1151 bad_prog(_(EXCESS_JUNK));
1152
1153 vector->v[blocks->v_index].x.jump_index = vector->v_length;
1154 blocks = release_label(blocks); /* done with this entry */
1155 break;
1156
1157 case 'e':
1158 ch = in_nonblank();
1159 if (ch == EOF || ch == '\n')
1160 {
1161 cur_cmd->x.cmd_txt.text_length = 0;
1162 break;
1163 }
1164 else
1165 goto read_text_to_slash;
1166
1167 case 'a':
1168 case 'i':
1169 case 'c':
1170 ch = in_nonblank();
1171
1172 read_text_to_slash:
1173 if (ch == EOF)
1174 bad_prog(_(EXPECTED_SLASH));
1175
1176 if (ch == '\\')
1177 ch = inchar();
1178 else
1179 {
1180 savchar(ch);
1181 ch = '\n';
1182 }
1183
1184 read_text(&cur_cmd->x.cmd_txt, ch);
1185 break;
1186
1187 case ':':
1188 if (cur_cmd->a1)
1189 bad_prog(_(NO_COLON_ADDR));
1190 labels = setup_label(labels, vector->v_length, read_label(), NULL);
1191 break;
1192
1193 case 'T':
1194 case 'b':
1195 case 't':
1196 jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1197 break;
1198
1199 case 'Q':
1200 case 'q':
1201 if (cur_cmd->a2)
1202 bad_prog(_(ONE_ADDR));
1203 /* Fall through */
1204
1205 case 'L':
1206 case 'l':
1207 ch = in_nonblank();
1208 if (ISDIGIT(ch))
1209 {
1210 cur_cmd->x.int_arg = in_integer(ch);
1211 ch = in_nonblank();
1212 }
1213 else
1214 cur_cmd->x.int_arg = -1;
1215
1216 if (ch == CLOSE_BRACE || ch == '#')
1217 savchar(ch);
1218 else if (ch != EOF && ch != '\n' && ch != ';')
1219 bad_prog(_(EXCESS_JUNK));
1220
1221 break;
1222
1223 case '=':
1224 case 'd':
1225 case 'D':
1226 case 'g':
1227 case 'G':
1228 case 'h':
1229 case 'H':
1230 case 'n':
1231 case 'N':
1232 case 'p':
1233 case 'P':
1234 case 'x':
1235 ch = in_nonblank();
1236 if (ch == CLOSE_BRACE || ch == '#')
1237 savchar(ch);
1238 else if (ch != EOF && ch != '\n' && ch != ';')
1239 bad_prog(_(EXCESS_JUNK));
1240 break;
1241
1242 case 'r':
1243 b = read_filename();
1244 cur_cmd->x.fname = ck_strdup(get_buffer(b));
1245 free_buffer(b);
1246 break;
1247
1248 case 'R':
1249 cur_cmd->x.fp = get_openfile(&file_read, "r", false)->fp;
1250 break;
1251
1252 case 'W':
1253 case 'w':
1254 cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1255 break;
1256
1257 case 's':
1258 {
1259 struct buffer *b2;
1260 int flags;
1261 int slash;
1262
1263 slash = inchar();
1264 if ( !(b = match_slash(slash, true)) )
1265 bad_prog(_(UNTERM_S_CMD));
1266 if ( !(b2 = match_slash(slash, false)) )
1267 bad_prog(_(UNTERM_S_CMD));
1268
1269 cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1270 setup_replacement(cur_cmd->x.cmd_subst,
1271 get_buffer(b2), size_buffer(b2));
1272 free_buffer(b2);
1273
1274 flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1275 cur_cmd->x.cmd_subst->regx =
1276 compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1277 free_buffer(b);
1278 }
1279 break;
1280
1281 case 'y':
1282 {
1283 size_t len, dest_len;
1284 int slash;
1285 struct buffer *b2;
1286 char *src_buf, *dest_buf;
1287
1288 slash = inchar();
1289 if ( !(b = match_slash(slash, false)) )
1290 bad_prog(_(UNTERM_Y_CMD));
1291 src_buf = get_buffer(b);
1292 len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1293
1294 if ( !(b2 = match_slash(slash, false)) )
1295 bad_prog(_(UNTERM_Y_CMD));
1296 dest_buf = get_buffer(b2);
1297 dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1298
1299 if (mb_cur_max > 1)
1300 {
1301 int i, j, idx, src_char_num;
1302 size_t *src_lens = MALLOC(len, size_t);
1303 char **trans_pairs;
1304 size_t mbclen;
1305 mbstate_t cur_stat;
1306
1307 /* Enumerate how many character the source buffer has. */
1308 memset(&cur_stat, 0, sizeof(mbstate_t));
1309 for (i = 0, j = 0; i < len;)
1310 {
1311 mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1312 /* An invalid sequence, or a truncated multibyte character.
1313 We treat it as a singlebyte character. */
1314 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1315 || mbclen == 0)
1316 mbclen = 1;
1317 src_lens[j++] = mbclen;
1318 i += mbclen;
1319 }
1320 src_char_num = j;
1321
1322 memset(&cur_stat, 0, sizeof(mbstate_t));
1323 idx = 0;
1324
1325 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1326 src(i) : pointer to i-th source character.
1327 dest(i) : pointer to i-th destination character.
1328 NULL : terminator */
1329 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1330 cur_cmd->x.translatemb = trans_pairs;
1331 for (i = 0; i < src_char_num; i++)
1332 {
1333 if (idx >= dest_len)
1334 bad_prog(_(Y_CMD_LEN));
1335
1336 /* Set the i-th source character. */
1337 trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1338 strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1339 trans_pairs[2 * i][src_lens[i]] = '\0';
1340 src_buf += src_lens[i]; /* Forward to next character. */
1341
1342 /* Fetch the i-th destination character. */
1343 mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1344 /* An invalid sequence, or a truncated multibyte character.
1345 We treat it as a singlebyte character. */
1346 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1347 || mbclen == 0)
1348 mbclen = 1;
1349
1350 /* Set the i-th destination character. */
1351 trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1352 strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1353 trans_pairs[2 * i + 1][mbclen] = '\0';
1354 idx += mbclen; /* Forward to next character. */
1355 }
1356 trans_pairs[2 * i] = NULL;
1357 if (idx != dest_len)
1358 bad_prog(_(Y_CMD_LEN));
1359 }
1360 else
1361 {
1362 char *translate = OB_MALLOC(&obs, YMAP_LENGTH, char);
1363 unsigned char *ustring = CAST(unsigned char *)src_buf;
1364
1365 if (len != dest_len)
1366 bad_prog(_(Y_CMD_LEN));
1367
1368 for (len = 0; len < YMAP_LENGTH; len++)
1369 translate[len] = len;
1370
1371 while (dest_len--)
1372 translate[(unsigned char)*ustring++] = *dest_buf++;
1373
1374 cur_cmd->x.translate = translate;
1375 }
1376
1377 if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1378 bad_prog(_(EXCESS_JUNK));
1379
1380 free_buffer(b);
1381 free_buffer(b2);
1382 }
1383 break;
1384
1385 case EOF:
1386 bad_prog(_(NO_COMMAND));
1387 /*NOTREACHED*/
1388
1389 default:
1390 bad_command (ch);
1391 /*NOTREACHED*/
1392 }
1393
1394 /* this is buried down here so that "continue" statements will miss it */
1395 ++vector->v_length;
1396 }
1397 return vector;
1398}
1399
1400
1401
1402/* deal with \X escapes */
1403size_t
1404normalize_text(buf, len, buftype)
1405 char *buf;
1406 size_t len;
1407 enum text_types buftype;
1408{
1409 const char *bufend = buf + len;
1410 char *p = buf;
1411 char *q = buf;
1412
1413 /* This variable prevents normalizing text within bracket
1414 subexpressions when conforming to POSIX. If 0, we
1415 are not within a bracket expression. If -1, we are within a
1416 bracket expression but are not within [.FOO.], [=FOO=],
1417 or [:FOO:]. Otherwise, this is the '.', '=', or ':'
1418 respectively within these three types of subexpressions. */
1419 int bracket_state = 0;
1420
1421 int mbclen;
1422 mbstate_t cur_stat;
1423 memset(&cur_stat, 0, sizeof(mbstate_t));
1424
1425 while (p < bufend)
1426 {
1427 int c;
1428 mbclen = MBRLEN (p, bufend - p, &cur_stat);
1429 if (mbclen != 1)
1430 {
1431 /* An invalid sequence, or a truncated multibyte character.
1432 We treat it as a singlebyte character. */
1433 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1434 mbclen = 1;
1435
1436 memmove (q, p, mbclen);
1437 q += mbclen;
1438 p += mbclen;
1439 continue;
1440 }
1441
1442 if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1443 switch ( (c = *++p) )
1444 {
1445#if defined __STDC__ && __STDC__-0
1446 case 'a': *q++ = '\a'; p++; continue;
1447#else /* Not STDC; we'll just assume ASCII */
1448 case 'a': *q++ = '\007'; p++; continue;
1449#endif
1450 /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1451 case 'f': *q++ = '\f'; p++; continue;
1452 case '\n': /*fall through */
1453 case 'n': *q++ = '\n'; p++; continue;
1454 case 'r': *q++ = '\r'; p++; continue;
1455 case 't': *q++ = '\t'; p++; continue;
1456 case 'v': *q++ = '\v'; p++; continue;
1457
1458 case 'd': /* decimal byte */
1459 p = convert_number(q, p+1, bufend, 10, 3, 'd');
1460 q++;
1461 continue;
1462
1463 case 'x': /* hexadecimal byte */
1464 p = convert_number(q, p+1, bufend, 16, 2, 'x');
1465 q++;
1466 continue;
1467
1468#ifdef REG_PERL
1469 case '0': case '1': case '2': case '3':
1470 case '4': case '5': case '6': case '7':
1471 if ((extended_regexp_flags & REG_PERL)
1472 && p+1 < bufend
1473 && p[1] >= '0' && p[1] <= '9')
1474 {
1475 p = convert_number(q, p, bufend, 8, 3, *p);
1476 q++;
1477 }
1478 else
1479 {
1480 /* we just pass the \ up one level for interpretation */
1481 if (buftype != TEXT_BUFFER)
1482 *q++ = '\\';
1483 }
1484
1485 continue;
1486
1487 case 'o': /* octal byte */
1488 if (!(extended_regexp_flags & REG_PERL))
1489 {
1490 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1491 q++;
1492 }
1493 else
1494 {
1495 /* we just pass the \ up one level for interpretation */
1496 if (buftype != TEXT_BUFFER)
1497 *q++ = '\\';
1498 }
1499
1500 continue;
1501#else
1502 case 'o': /* octal byte */
1503 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1504 q++;
1505 continue;
1506#endif
1507
1508 case 'c':
1509 if (++p < bufend)
1510 {
1511 *q++ = toupper(*p) ^ 0x40;
1512 p++;
1513 continue;
1514 }
1515 else
1516 {
1517 /* we just pass the \ up one level for interpretation */
1518 if (buftype != TEXT_BUFFER)
1519 *q++ = '\\';
1520 continue;
1521 }
1522
1523 default:
1524 /* we just pass the \ up one level for interpretation */
1525 if (buftype != TEXT_BUFFER)
1526 *q++ = '\\';
1527 break;
1528 }
1529 else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1530 switch (*p)
1531 {
1532 case '[':
1533 if (!bracket_state)
1534 bracket_state = -1;
1535 break;
1536
1537 case ':':
1538 case '.':
1539 case '=':
1540 if (bracket_state == -1 && p[-1] == '[')
1541 bracket_state = *p;
1542 break;
1543
1544 case ']':
1545 if (bracket_state == 0)
1546 ;
1547 else if (bracket_state == -1)
1548 bracket_state = 0;
1549 else if (p[-2] != bracket_state && p[-1] == bracket_state)
1550 bracket_state = -1;
1551 break;
1552 }
1553
1554 *q++ = *p++;
1555 }
1556 return (size_t)(q - buf);
1557}
1558
1559
1560/* `str' is a string (from the command line) that contains a sed command.
1561 Compile the command, and add it to the end of `cur_program'. */
1562struct vector *
1563compile_string(cur_program, str, len)
1564 struct vector *cur_program;
1565 char *str;
1566 size_t len;
1567{
1568 static countT string_expr_count = 0;
1569 struct vector *ret;
1570
1571 prog.file = NULL;
1572 prog.base = CAST(unsigned char *)str;
1573 prog.cur = prog.base;
1574 prog.end = prog.cur + len;
1575
1576 cur_input.line = 0;
1577 cur_input.name = NULL;
1578 cur_input.string_expr_count = ++string_expr_count;
1579
1580 ret = compile_program(cur_program);
1581 prog.base = NULL;
1582 prog.cur = NULL;
1583 prog.end = NULL;
1584
1585 first_script = false;
1586 return ret;
1587}
1588
1589/* `cmdfile' is the name of a file containing sed commands.
1590 Read them in and add them to the end of `cur_program'.
1591 */
1592struct vector *
1593compile_file(cur_program, cmdfile)
1594 struct vector *cur_program;
1595 const char *cmdfile;
1596{
1597 size_t len;
1598 struct vector *ret;
1599
1600 prog.file = stdin;
1601 if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1602 prog.file = ck_fopen(cmdfile, "rt", true);
1603
1604 cur_input.line = 1;
1605 cur_input.name = cmdfile;
1606 cur_input.string_expr_count = 0;
1607
1608 ret = compile_program(cur_program);
1609 if (prog.file != stdin)
1610 ck_fclose(prog.file);
1611 prog.file = NULL;
1612
1613 first_script = false;
1614 return ret;
1615}
1616
1617/* Make any checks which require the whole program to have been read.
1618 In particular: this backpatches the jump targets.
1619 Any cleanup which can be done after these checks is done here also. */
1620void
1621check_final_program(program)
1622 struct vector *program;
1623{
1624 struct sed_label *go;
1625 struct sed_label *lbl;
1626
1627 /* do all "{"s have a corresponding "}"? */
1628 if (blocks)
1629 {
1630 /* update info for error reporting: */
1631 MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1632 bad_prog(_(EXCESS_OPEN_BRACE));
1633 }
1634
1635 /* was the final command an unterminated a/c/i command? */
1636 if (pending_text)
1637 {
1638 old_text_buf->text_length = size_buffer(pending_text);
1639 old_text_buf->text = MEMDUP(get_buffer(pending_text),
1640 old_text_buf->text_length, char);
1641 free_buffer(pending_text);
1642 pending_text = NULL;
1643 }
1644
1645 for (go = jumps; go; go = release_label(go))
1646 {
1647 for (lbl = labels; lbl; lbl = lbl->next)
1648 if (strcmp(lbl->name, go->name) == 0)
1649 break;
1650 if (lbl)
1651 {
1652 program->v[go->v_index].x.jump_index = lbl->v_index;
1653 }
1654 else
1655 {
1656 if (*go->name)
1657 panic(_("can't find label for jump to `%s'"), go->name);
1658 program->v[go->v_index].x.jump_index = program->v_length;
1659 }
1660 }
1661 jumps = NULL;
1662
1663 for (lbl = labels; lbl; lbl = release_label(lbl))
1664 ;
1665 labels = NULL;
1666
1667 /* There is no longer a need to track file names: */
1668 {
1669 struct output *p;
1670
1671 for (p=file_read; p; p=p->link)
1672 if (p->name)
1673 {
1674 FREE(p->name);
1675 p->name = NULL;
1676 }
1677
1678 for (p=file_write; p; p=p->link)
1679 if (p->name)
1680 {
1681 FREE(p->name);
1682 p->name = NULL;
1683 }
1684 }
1685}
1686
1687/* Rewind all resources which were allocated in this module. */
1688void
1689rewind_read_files()
1690{
1691 struct output *p;
1692
1693 for (p=file_read; p; p=p->link)
1694 if (p->fp)
1695 rewind(p->fp);
1696}
1697
1698/* Release all resources which were allocated in this module. */
1699void
1700finish_program(program)
1701 struct vector *program;
1702{
1703 /* close all files... */
1704 {
1705 struct output *p, *q;
1706
1707 for (p=file_read; p; p=q)
1708 {
1709 if (p->fp)
1710 ck_fclose(p->fp);
1711 q = p->link;
1712#if 0
1713 /* We use obstacks. */
1714 FREE(p);
1715#endif
1716 }
1717
1718 for (p=file_write; p; p=q)
1719 {
1720 if (p->fp)
1721 ck_fclose(p->fp);
1722 q = p->link;
1723#if 0
1724 /* We use obstacks. */
1725 FREE(p);
1726#endif
1727 }
1728 file_read = file_write = NULL;
1729 }
1730
1731#ifdef DEBUG_LEAKS
1732 obstack_free (&obs, NULL);
1733#endif /*DEBUG_LEAKS*/
1734}
Note: See TracBrowser for help on using the repository browser.