1 | /* GNU SED, a batch stream editor.
|
---|
2 | Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
|
---|
3 | Free Software Foundation, Inc.
|
---|
4 |
|
---|
5 | This program is free software; you can redistribute it and/or modify
|
---|
6 | it under the terms of the GNU General Public License as published by
|
---|
7 | the Free Software Foundation; either version 2, or (at your option)
|
---|
8 | any later version.
|
---|
9 |
|
---|
10 | This program is distributed in the hope that it will be useful,
|
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | GNU General Public License for more details.
|
---|
14 |
|
---|
15 | You should have received a copy of the GNU General Public License
|
---|
16 | along with this program; if not, write to the Free Software
|
---|
17 | Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
---|
18 |
|
---|
19 | #ifdef HAVE_CONFIG_H
|
---|
20 | #include "config.h"
|
---|
21 | #endif
|
---|
22 |
|
---|
23 | #include "basicdefs.h"
|
---|
24 | #include "regex.h"
|
---|
25 |
|
---|
26 | #ifndef BOOTSTRAP
|
---|
27 | #include <stdio.h>
|
---|
28 | #endif
|
---|
29 |
|
---|
30 | #include "utils.h"
|
---|
31 |
|
---|
32 | /* Struct vector is used to describe a compiled sed program. */
|
---|
33 | struct vector {
|
---|
34 | struct sed_cmd *v; /* a dynamically allocated array */
|
---|
35 | size_t v_allocated; /* ... number slots allocated */
|
---|
36 | size_t v_length; /* ... number of slots in use */
|
---|
37 | };
|
---|
38 |
|
---|
39 | /* This structure tracks files used by sed so that they may all be
|
---|
40 | closed cleanly at normal program termination. A flag is kept that tells
|
---|
41 | if a missing newline was encountered, so that it is added on the
|
---|
42 | next line and the two lines are not concatenated. */
|
---|
43 | struct output {
|
---|
44 | char *name;
|
---|
45 | bool missing_newline;
|
---|
46 | FILE *fp;
|
---|
47 | struct output *link;
|
---|
48 | };
|
---|
49 |
|
---|
50 | struct text_buf {
|
---|
51 | char *text;
|
---|
52 | size_t text_length;
|
---|
53 | };
|
---|
54 |
|
---|
55 | struct regex {
|
---|
56 | regex_t pattern;
|
---|
57 | int flags;
|
---|
58 | size_t sz;
|
---|
59 | char re[1];
|
---|
60 | };
|
---|
61 |
|
---|
62 | enum replacement_types {
|
---|
63 | REPL_ASIS = 0,
|
---|
64 | REPL_UPPERCASE = 1,
|
---|
65 | REPL_LOWERCASE = 2,
|
---|
66 | REPL_UPPERCASE_FIRST = 4,
|
---|
67 | REPL_LOWERCASE_FIRST = 8,
|
---|
68 | REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
|
---|
69 |
|
---|
70 | /* These are given to aid in debugging */
|
---|
71 | REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
|
---|
72 | REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
|
---|
73 | REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
|
---|
74 | REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
|
---|
75 | };
|
---|
76 |
|
---|
77 | enum text_types {
|
---|
78 | TEXT_BUFFER,
|
---|
79 | TEXT_REPLACEMENT,
|
---|
80 | TEXT_REGEX
|
---|
81 | };
|
---|
82 |
|
---|
83 | enum posixicity_types {
|
---|
84 | POSIXLY_EXTENDED, /* with GNU extensions */
|
---|
85 | POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */
|
---|
86 | POSIXLY_BASIC /* pedantically POSIX */
|
---|
87 | };
|
---|
88 |
|
---|
89 | enum addr_state {
|
---|
90 | RANGE_INACTIVE, /* never been active */
|
---|
91 | RANGE_ACTIVE, /* between first and second address */
|
---|
92 | RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */
|
---|
93 | };
|
---|
94 |
|
---|
95 | enum addr_types {
|
---|
96 | ADDR_IS_NULL, /* null address */
|
---|
97 | ADDR_IS_REGEX, /* a.addr_regex is valid */
|
---|
98 | ADDR_IS_NUM, /* a.addr_number is valid */
|
---|
99 | ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */
|
---|
100 | ADDR_IS_STEP, /* address is +N (only valid for addr2) */
|
---|
101 | ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */
|
---|
102 | ADDR_IS_LAST /* address is $ */
|
---|
103 | };
|
---|
104 |
|
---|
105 | struct addr {
|
---|
106 | enum addr_types addr_type;
|
---|
107 | countT addr_number;
|
---|
108 | countT addr_step;
|
---|
109 | struct regex *addr_regex;
|
---|
110 | };
|
---|
111 |
|
---|
112 |
|
---|
113 | struct replacement {
|
---|
114 | char *prefix;
|
---|
115 | size_t prefix_length;
|
---|
116 | int subst_id;
|
---|
117 | enum replacement_types repl_type;
|
---|
118 | struct replacement *next;
|
---|
119 | };
|
---|
120 |
|
---|
121 | struct subst {
|
---|
122 | struct regex *regx;
|
---|
123 | struct replacement *replacement;
|
---|
124 | countT numb; /* if >0, only substitute for match number "numb" */
|
---|
125 | struct output *outf; /* 'w' option given */
|
---|
126 | unsigned global : 1; /* 'g' option given */
|
---|
127 | unsigned print : 2; /* 'p' option given (before/after eval) */
|
---|
128 | unsigned eval : 1; /* 'e' option given */
|
---|
129 | unsigned max_id : 4; /* maximum backreference on the RHS */
|
---|
130 | };
|
---|
131 |
|
---|
132 | #ifdef REG_PERL
|
---|
133 | /* This is the structure we store register match data in. See
|
---|
134 | regex.texinfo for a full description of what registers match. */
|
---|
135 | struct re_registers
|
---|
136 | {
|
---|
137 | unsigned num_regs;
|
---|
138 | regoff_t *start;
|
---|
139 | regoff_t *end;
|
---|
140 | };
|
---|
141 | #endif
|
---|
142 |
|
---|
143 |
|
---|
144 |
|
---|
145 | struct sed_cmd {
|
---|
146 | struct addr *a1; /* save space: usually is NULL */
|
---|
147 | struct addr *a2;
|
---|
148 |
|
---|
149 | /* See description the enum, above. */
|
---|
150 | enum addr_state range_state;
|
---|
151 |
|
---|
152 | /* Non-zero if command is to be applied to non-matches. */
|
---|
153 | char addr_bang;
|
---|
154 |
|
---|
155 | /* The actual command character. */
|
---|
156 | char cmd;
|
---|
157 |
|
---|
158 | /* auxiliary data for various commands */
|
---|
159 | union {
|
---|
160 | /* This structure is used for a, i, and c commands. */
|
---|
161 | struct text_buf cmd_txt;
|
---|
162 |
|
---|
163 | /* This is used for the l, q and Q commands. */
|
---|
164 | int int_arg;
|
---|
165 |
|
---|
166 | /* This is used for the {}, b, and t commands. */
|
---|
167 | countT jump_index;
|
---|
168 |
|
---|
169 | /* This is used for the r command. */
|
---|
170 | char *fname;
|
---|
171 |
|
---|
172 | /* This is used for the hairy s command. */
|
---|
173 | struct subst *cmd_subst;
|
---|
174 |
|
---|
175 | /* This is used for the w command. */
|
---|
176 | struct output *outf;
|
---|
177 |
|
---|
178 | /* This is used for the R command. */
|
---|
179 | FILE *fp;
|
---|
180 |
|
---|
181 | /* This is used for the y command. */
|
---|
182 | unsigned char *translate;
|
---|
183 | char **translatemb;
|
---|
184 | } x;
|
---|
185 | };
|
---|
186 |
|
---|
187 |
|
---|
188 | |
---|
189 |
|
---|
190 | void bad_prog P_((const char *why));
|
---|
191 | size_t normalize_text P_((char *text, size_t len, enum text_types buftype));
|
---|
192 | struct vector *compile_string P_((struct vector *, char *str, size_t len));
|
---|
193 | struct vector *compile_file P_((struct vector *, const char *cmdfile));
|
---|
194 | void check_final_program P_((struct vector *));
|
---|
195 | void rewind_read_files P_((void));
|
---|
196 | void finish_program P_((struct vector *));
|
---|
197 |
|
---|
198 | struct regex *compile_regex P_((struct buffer *b, int flags, int needed_sub));
|
---|
199 | int match_regex P_((struct regex *regex,
|
---|
200 | char *buf, size_t buflen, size_t buf_start_offset,
|
---|
201 | struct re_registers *regarray, int regsize));
|
---|
202 | #ifdef DEBUG_LEAKS
|
---|
203 | void release_regex P_((struct regex *));
|
---|
204 | #endif
|
---|
205 |
|
---|
206 | int process_files P_((struct vector *, char **argv));
|
---|
207 |
|
---|
208 | int main P_((int, char **));
|
---|
209 |
|
---|
210 | extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file));
|
---|
211 |
|
---|
212 | extern int extended_regexp_flags;
|
---|
213 |
|
---|
214 | #ifndef CONFIG_WITHOUT_O_OPT
|
---|
215 | /* The output file, defaults to stdout but can be overridden
|
---|
216 | by the -o or --output option. main sets this to avoid problems. */
|
---|
217 | extern FILE *sed_stdout;
|
---|
218 | #endif
|
---|
219 |
|
---|
220 | /* If set, fflush(stdout) on every line output. */
|
---|
221 | extern bool unbuffered_output;
|
---|
222 |
|
---|
223 | /* If set, don't write out the line unless explicitly told to. */
|
---|
224 | extern bool no_default_output;
|
---|
225 |
|
---|
226 | /* If set, reset line counts on every new file. */
|
---|
227 | extern bool separate_files;
|
---|
228 |
|
---|
229 | /* Do we need to be pedantically POSIX compliant? */
|
---|
230 | extern enum posixicity_types posixicity;
|
---|
231 |
|
---|
232 | /* How long should the `l' command's output line be? */
|
---|
233 | extern countT lcmd_out_line_len;
|
---|
234 |
|
---|
235 | /* How do we edit files in-place? (we don't if NULL) */
|
---|
236 | extern char *in_place_extension;
|
---|
237 |
|
---|
238 | /* Should we use EREs? */
|
---|
239 | extern bool use_extended_syntax_p;
|
---|
240 |
|
---|
241 | /* Declarations for multibyte character sets. */
|
---|
242 | extern int mb_cur_max;
|
---|
243 |
|
---|
244 | #ifdef HAVE_MBRTOWC
|
---|
245 | #ifdef HAVE_BTOWC
|
---|
246 | #define MBRTOWC(pwc, s, n, ps) \
|
---|
247 | (mb_cur_max == 1 ? \
|
---|
248 | (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
|
---|
249 | mbrtowc ((pwc), (s), (n), (ps)))
|
---|
250 |
|
---|
251 | #define WCRTOMB(s, wc, ps) \
|
---|
252 | (mb_cur_max == 1 ? \
|
---|
253 | (*(s) = wctob ((wint_t) (wc)), 1) : \
|
---|
254 | wcrtomb ((s), (wc), (ps)))
|
---|
255 | #else
|
---|
256 | #define MBRTOWC(pwc, s, n, ps) \
|
---|
257 | mbrtowc ((pwc), (s), (n), (ps))
|
---|
258 |
|
---|
259 | #define WCRTOMB(s, wc, ps) \
|
---|
260 | wcrtomb ((s), (wc), (ps))
|
---|
261 | #endif
|
---|
262 |
|
---|
263 | #define MBRLEN(s, n, ps) \
|
---|
264 | (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
|
---|
265 |
|
---|
266 | #define BRLEN(ch, ps) \
|
---|
267 | (mb_cur_max == 1 ? 1 : brlen (ch, ps))
|
---|
268 |
|
---|
269 | #else
|
---|
270 | #define MBRLEN(s, n, ps) 1
|
---|
271 | #define BRLEN(ch, ps) 1
|
---|
272 | #endif
|
---|
273 |
|
---|
274 | extern int brlen P_ ((int ch, mbstate_t *ps));
|
---|
275 | extern void initialize_mbcs P_ ((void));
|
---|
276 |
|
---|