| 1 | /* GNU SED, a batch stream editor.
|
|---|
| 2 | Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
|
|---|
| 3 | Free Software Foundation, Inc.
|
|---|
| 4 |
|
|---|
| 5 | This program is free software; you can redistribute it and/or modify
|
|---|
| 6 | it under the terms of the GNU General Public License as published by
|
|---|
| 7 | the Free Software Foundation; either version 2, or (at your option)
|
|---|
| 8 | any later version.
|
|---|
| 9 |
|
|---|
| 10 | This program is distributed in the hope that it will be useful,
|
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 13 | GNU General Public License for more details.
|
|---|
| 14 |
|
|---|
| 15 | You should have received a copy of the GNU General Public License
|
|---|
| 16 | along with this program; if not, write to the Free Software
|
|---|
| 17 | Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
|---|
| 18 |
|
|---|
| 19 | #ifdef HAVE_CONFIG_H
|
|---|
| 20 | #include "config.h"
|
|---|
| 21 | #endif
|
|---|
| 22 |
|
|---|
| 23 | #include "basicdefs.h"
|
|---|
| 24 | #include "regex.h"
|
|---|
| 25 |
|
|---|
| 26 | #ifndef BOOTSTRAP
|
|---|
| 27 | #include <stdio.h>
|
|---|
| 28 | #endif
|
|---|
| 29 |
|
|---|
| 30 | #include "utils.h"
|
|---|
| 31 |
|
|---|
| 32 | /* Struct vector is used to describe a compiled sed program. */
|
|---|
| 33 | struct vector {
|
|---|
| 34 | struct sed_cmd *v; /* a dynamically allocated array */
|
|---|
| 35 | size_t v_allocated; /* ... number slots allocated */
|
|---|
| 36 | size_t v_length; /* ... number of slots in use */
|
|---|
| 37 | };
|
|---|
| 38 |
|
|---|
| 39 | /* This structure tracks files used by sed so that they may all be
|
|---|
| 40 | closed cleanly at normal program termination. A flag is kept that tells
|
|---|
| 41 | if a missing newline was encountered, so that it is added on the
|
|---|
| 42 | next line and the two lines are not concatenated. */
|
|---|
| 43 | struct output {
|
|---|
| 44 | char *name;
|
|---|
| 45 | bool missing_newline;
|
|---|
| 46 | FILE *fp;
|
|---|
| 47 | struct output *link;
|
|---|
| 48 | };
|
|---|
| 49 |
|
|---|
| 50 | struct text_buf {
|
|---|
| 51 | char *text;
|
|---|
| 52 | size_t text_length;
|
|---|
| 53 | };
|
|---|
| 54 |
|
|---|
| 55 | struct regex {
|
|---|
| 56 | regex_t pattern;
|
|---|
| 57 | int flags;
|
|---|
| 58 | size_t sz;
|
|---|
| 59 | char re[1];
|
|---|
| 60 | };
|
|---|
| 61 |
|
|---|
| 62 | enum replacement_types {
|
|---|
| 63 | REPL_ASIS = 0,
|
|---|
| 64 | REPL_UPPERCASE = 1,
|
|---|
| 65 | REPL_LOWERCASE = 2,
|
|---|
| 66 | REPL_UPPERCASE_FIRST = 4,
|
|---|
| 67 | REPL_LOWERCASE_FIRST = 8,
|
|---|
| 68 | REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
|
|---|
| 69 |
|
|---|
| 70 | /* These are given to aid in debugging */
|
|---|
| 71 | REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
|
|---|
| 72 | REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
|
|---|
| 73 | REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
|
|---|
| 74 | REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
|
|---|
| 75 | };
|
|---|
| 76 |
|
|---|
| 77 | enum text_types {
|
|---|
| 78 | TEXT_BUFFER,
|
|---|
| 79 | TEXT_REPLACEMENT,
|
|---|
| 80 | TEXT_REGEX
|
|---|
| 81 | };
|
|---|
| 82 |
|
|---|
| 83 | enum posixicity_types {
|
|---|
| 84 | POSIXLY_EXTENDED, /* with GNU extensions */
|
|---|
| 85 | POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */
|
|---|
| 86 | POSIXLY_BASIC /* pedantically POSIX */
|
|---|
| 87 | };
|
|---|
| 88 |
|
|---|
| 89 | enum addr_state {
|
|---|
| 90 | RANGE_INACTIVE, /* never been active */
|
|---|
| 91 | RANGE_ACTIVE, /* between first and second address */
|
|---|
| 92 | RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */
|
|---|
| 93 | };
|
|---|
| 94 |
|
|---|
| 95 | enum addr_types {
|
|---|
| 96 | ADDR_IS_NULL, /* null address */
|
|---|
| 97 | ADDR_IS_REGEX, /* a.addr_regex is valid */
|
|---|
| 98 | ADDR_IS_NUM, /* a.addr_number is valid */
|
|---|
| 99 | ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */
|
|---|
| 100 | ADDR_IS_STEP, /* address is +N (only valid for addr2) */
|
|---|
| 101 | ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */
|
|---|
| 102 | ADDR_IS_LAST /* address is $ */
|
|---|
| 103 | };
|
|---|
| 104 |
|
|---|
| 105 | struct addr {
|
|---|
| 106 | enum addr_types addr_type;
|
|---|
| 107 | countT addr_number;
|
|---|
| 108 | countT addr_step;
|
|---|
| 109 | struct regex *addr_regex;
|
|---|
| 110 | };
|
|---|
| 111 |
|
|---|
| 112 |
|
|---|
| 113 | struct replacement {
|
|---|
| 114 | char *prefix;
|
|---|
| 115 | size_t prefix_length;
|
|---|
| 116 | int subst_id;
|
|---|
| 117 | enum replacement_types repl_type;
|
|---|
| 118 | struct replacement *next;
|
|---|
| 119 | };
|
|---|
| 120 |
|
|---|
| 121 | struct subst {
|
|---|
| 122 | struct regex *regx;
|
|---|
| 123 | struct replacement *replacement;
|
|---|
| 124 | countT numb; /* if >0, only substitute for match number "numb" */
|
|---|
| 125 | struct output *outf; /* 'w' option given */
|
|---|
| 126 | unsigned global : 1; /* 'g' option given */
|
|---|
| 127 | unsigned print : 2; /* 'p' option given (before/after eval) */
|
|---|
| 128 | unsigned eval : 1; /* 'e' option given */
|
|---|
| 129 | unsigned max_id : 4; /* maximum backreference on the RHS */
|
|---|
| 130 | };
|
|---|
| 131 |
|
|---|
| 132 | #ifdef REG_PERL
|
|---|
| 133 | /* This is the structure we store register match data in. See
|
|---|
| 134 | regex.texinfo for a full description of what registers match. */
|
|---|
| 135 | struct re_registers
|
|---|
| 136 | {
|
|---|
| 137 | unsigned num_regs;
|
|---|
| 138 | regoff_t *start;
|
|---|
| 139 | regoff_t *end;
|
|---|
| 140 | };
|
|---|
| 141 | #endif
|
|---|
| 142 |
|
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 | struct sed_cmd {
|
|---|
| 146 | struct addr *a1; /* save space: usually is NULL */
|
|---|
| 147 | struct addr *a2;
|
|---|
| 148 |
|
|---|
| 149 | /* See description the enum, above. */
|
|---|
| 150 | enum addr_state range_state;
|
|---|
| 151 |
|
|---|
| 152 | /* Non-zero if command is to be applied to non-matches. */
|
|---|
| 153 | char addr_bang;
|
|---|
| 154 |
|
|---|
| 155 | /* The actual command character. */
|
|---|
| 156 | char cmd;
|
|---|
| 157 |
|
|---|
| 158 | /* auxiliary data for various commands */
|
|---|
| 159 | union {
|
|---|
| 160 | /* This structure is used for a, i, and c commands. */
|
|---|
| 161 | struct text_buf cmd_txt;
|
|---|
| 162 |
|
|---|
| 163 | /* This is used for the l, q and Q commands. */
|
|---|
| 164 | int int_arg;
|
|---|
| 165 |
|
|---|
| 166 | /* This is used for the {}, b, and t commands. */
|
|---|
| 167 | countT jump_index;
|
|---|
| 168 |
|
|---|
| 169 | /* This is used for the r command. */
|
|---|
| 170 | char *fname;
|
|---|
| 171 |
|
|---|
| 172 | /* This is used for the hairy s command. */
|
|---|
| 173 | struct subst *cmd_subst;
|
|---|
| 174 |
|
|---|
| 175 | /* This is used for the w command. */
|
|---|
| 176 | struct output *outf;
|
|---|
| 177 |
|
|---|
| 178 | /* This is used for the R command. */
|
|---|
| 179 | FILE *fp;
|
|---|
| 180 |
|
|---|
| 181 | /* This is used for the y command. */
|
|---|
| 182 | unsigned char *translate;
|
|---|
| 183 | char **translatemb;
|
|---|
| 184 | } x;
|
|---|
| 185 | };
|
|---|
| 186 |
|
|---|
| 187 |
|
|---|
| 188 | |
|---|
| 189 |
|
|---|
| 190 | void bad_prog P_((const char *why));
|
|---|
| 191 | size_t normalize_text P_((char *text, size_t len, enum text_types buftype));
|
|---|
| 192 | struct vector *compile_string P_((struct vector *, char *str, size_t len));
|
|---|
| 193 | struct vector *compile_file P_((struct vector *, const char *cmdfile));
|
|---|
| 194 | void check_final_program P_((struct vector *));
|
|---|
| 195 | void rewind_read_files P_((void));
|
|---|
| 196 | void finish_program P_((struct vector *));
|
|---|
| 197 |
|
|---|
| 198 | struct regex *compile_regex P_((struct buffer *b, int flags, int needed_sub));
|
|---|
| 199 | int match_regex P_((struct regex *regex,
|
|---|
| 200 | char *buf, size_t buflen, size_t buf_start_offset,
|
|---|
| 201 | struct re_registers *regarray, int regsize));
|
|---|
| 202 | #ifdef DEBUG_LEAKS
|
|---|
| 203 | void release_regex P_((struct regex *));
|
|---|
| 204 | #endif
|
|---|
| 205 |
|
|---|
| 206 | int process_files P_((struct vector *, char **argv));
|
|---|
| 207 |
|
|---|
| 208 | int main P_((int, char **));
|
|---|
| 209 |
|
|---|
| 210 | extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file));
|
|---|
| 211 |
|
|---|
| 212 | extern int extended_regexp_flags;
|
|---|
| 213 |
|
|---|
| 214 | #ifndef CONFIG_WITHOUT_O_OPT
|
|---|
| 215 | /* The output file, defaults to stdout but can be overridden
|
|---|
| 216 | by the -o or --output option. main sets this to avoid problems. */
|
|---|
| 217 | extern FILE *sed_stdout;
|
|---|
| 218 | #endif
|
|---|
| 219 |
|
|---|
| 220 | /* If set, fflush(stdout) on every line output. */
|
|---|
| 221 | extern bool unbuffered_output;
|
|---|
| 222 |
|
|---|
| 223 | /* If set, don't write out the line unless explicitly told to. */
|
|---|
| 224 | extern bool no_default_output;
|
|---|
| 225 |
|
|---|
| 226 | /* If set, reset line counts on every new file. */
|
|---|
| 227 | extern bool separate_files;
|
|---|
| 228 |
|
|---|
| 229 | /* Do we need to be pedantically POSIX compliant? */
|
|---|
| 230 | extern enum posixicity_types posixicity;
|
|---|
| 231 |
|
|---|
| 232 | /* How long should the `l' command's output line be? */
|
|---|
| 233 | extern countT lcmd_out_line_len;
|
|---|
| 234 |
|
|---|
| 235 | /* How do we edit files in-place? (we don't if NULL) */
|
|---|
| 236 | extern char *in_place_extension;
|
|---|
| 237 |
|
|---|
| 238 | /* Should we use EREs? */
|
|---|
| 239 | extern bool use_extended_syntax_p;
|
|---|
| 240 |
|
|---|
| 241 | /* Declarations for multibyte character sets. */
|
|---|
| 242 | extern int mb_cur_max;
|
|---|
| 243 |
|
|---|
| 244 | #ifdef HAVE_MBRTOWC
|
|---|
| 245 | #ifdef HAVE_BTOWC
|
|---|
| 246 | #define MBRTOWC(pwc, s, n, ps) \
|
|---|
| 247 | (mb_cur_max == 1 ? \
|
|---|
| 248 | (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
|
|---|
| 249 | mbrtowc ((pwc), (s), (n), (ps)))
|
|---|
| 250 |
|
|---|
| 251 | #define WCRTOMB(s, wc, ps) \
|
|---|
| 252 | (mb_cur_max == 1 ? \
|
|---|
| 253 | (*(s) = wctob ((wint_t) (wc)), 1) : \
|
|---|
| 254 | wcrtomb ((s), (wc), (ps)))
|
|---|
| 255 | #else
|
|---|
| 256 | #define MBRTOWC(pwc, s, n, ps) \
|
|---|
| 257 | mbrtowc ((pwc), (s), (n), (ps))
|
|---|
| 258 |
|
|---|
| 259 | #define WCRTOMB(s, wc, ps) \
|
|---|
| 260 | wcrtomb ((s), (wc), (ps))
|
|---|
| 261 | #endif
|
|---|
| 262 |
|
|---|
| 263 | #define MBRLEN(s, n, ps) \
|
|---|
| 264 | (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
|
|---|
| 265 |
|
|---|
| 266 | #define BRLEN(ch, ps) \
|
|---|
| 267 | (mb_cur_max == 1 ? 1 : brlen (ch, ps))
|
|---|
| 268 |
|
|---|
| 269 | #else
|
|---|
| 270 | #define MBRLEN(s, n, ps) 1
|
|---|
| 271 | #define BRLEN(ch, ps) 1
|
|---|
| 272 | #endif
|
|---|
| 273 |
|
|---|
| 274 | extern int brlen P_ ((int ch, mbstate_t *ps));
|
|---|
| 275 | extern void initialize_mbcs P_ ((void));
|
|---|
| 276 |
|
|---|