source: trunk/src/oldsed/sed/sed.h@ 3610

Last change on this file since 3610 was 1301, checked in by bird, 18 years ago

Added options for sending the output to a file without having to make use of redirection (-o, --output, --output-text, --output-binary).

File size: 7.3 KB
Line 
1/* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19#ifdef HAVE_CONFIG_H
20#include "config.h"
21#endif
22
23#include "basicdefs.h"
24#include "regex.h"
25
26#ifndef BOOTSTRAP
27#include <stdio.h>
28#endif
29
30#include "utils.h"
31
32/* Struct vector is used to describe a compiled sed program. */
33struct vector {
34 struct sed_cmd *v; /* a dynamically allocated array */
35 size_t v_allocated; /* ... number slots allocated */
36 size_t v_length; /* ... number of slots in use */
37};
38
39/* This structure tracks files used by sed so that they may all be
40 closed cleanly at normal program termination. A flag is kept that tells
41 if a missing newline was encountered, so that it is added on the
42 next line and the two lines are not concatenated. */
43struct output {
44 char *name;
45 bool missing_newline;
46 FILE *fp;
47 struct output *link;
48};
49
50struct text_buf {
51 char *text;
52 size_t text_length;
53};
54
55struct regex {
56 regex_t pattern;
57 int flags;
58 size_t sz;
59 char re[1];
60};
61
62enum replacement_types {
63 REPL_ASIS = 0,
64 REPL_UPPERCASE = 1,
65 REPL_LOWERCASE = 2,
66 REPL_UPPERCASE_FIRST = 4,
67 REPL_LOWERCASE_FIRST = 8,
68 REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
69
70 /* These are given to aid in debugging */
71 REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
72 REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
73 REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
74 REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
75};
76
77enum text_types {
78 TEXT_BUFFER,
79 TEXT_REPLACEMENT,
80 TEXT_REGEX
81};
82
83enum posixicity_types {
84 POSIXLY_EXTENDED, /* with GNU extensions */
85 POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */
86 POSIXLY_BASIC /* pedantically POSIX */
87};
88
89enum addr_state {
90 RANGE_INACTIVE, /* never been active */
91 RANGE_ACTIVE, /* between first and second address */
92 RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */
93};
94
95enum addr_types {
96 ADDR_IS_NULL, /* null address */
97 ADDR_IS_REGEX, /* a.addr_regex is valid */
98 ADDR_IS_NUM, /* a.addr_number is valid */
99 ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */
100 ADDR_IS_STEP, /* address is +N (only valid for addr2) */
101 ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */
102 ADDR_IS_LAST /* address is $ */
103};
104
105struct addr {
106 enum addr_types addr_type;
107 countT addr_number;
108 countT addr_step;
109 struct regex *addr_regex;
110};
111
112
113struct replacement {
114 char *prefix;
115 size_t prefix_length;
116 int subst_id;
117 enum replacement_types repl_type;
118 struct replacement *next;
119};
120
121struct subst {
122 struct regex *regx;
123 struct replacement *replacement;
124 countT numb; /* if >0, only substitute for match number "numb" */
125 struct output *outf; /* 'w' option given */
126 unsigned global : 1; /* 'g' option given */
127 unsigned print : 2; /* 'p' option given (before/after eval) */
128 unsigned eval : 1; /* 'e' option given */
129 unsigned max_id : 4; /* maximum backreference on the RHS */
130};
131
132#ifdef REG_PERL
133/* This is the structure we store register match data in. See
134 regex.texinfo for a full description of what registers match. */
135struct re_registers
136{
137 unsigned num_regs;
138 regoff_t *start;
139 regoff_t *end;
140};
141#endif
142
143
144
145struct sed_cmd {
146 struct addr *a1; /* save space: usually is NULL */
147 struct addr *a2;
148
149 /* See description the enum, above. */
150 enum addr_state range_state;
151
152 /* Non-zero if command is to be applied to non-matches. */
153 char addr_bang;
154
155 /* The actual command character. */
156 char cmd;
157
158 /* auxiliary data for various commands */
159 union {
160 /* This structure is used for a, i, and c commands. */
161 struct text_buf cmd_txt;
162
163 /* This is used for the l, q and Q commands. */
164 int int_arg;
165
166 /* This is used for the {}, b, and t commands. */
167 countT jump_index;
168
169 /* This is used for the r command. */
170 char *fname;
171
172 /* This is used for the hairy s command. */
173 struct subst *cmd_subst;
174
175 /* This is used for the w command. */
176 struct output *outf;
177
178 /* This is used for the R command. */
179 FILE *fp;
180
181 /* This is used for the y command. */
182 unsigned char *translate;
183 char **translatemb;
184 } x;
185};
186
187
188
189
190void bad_prog P_((const char *why));
191size_t normalize_text P_((char *text, size_t len, enum text_types buftype));
192struct vector *compile_string P_((struct vector *, char *str, size_t len));
193struct vector *compile_file P_((struct vector *, const char *cmdfile));
194void check_final_program P_((struct vector *));
195void rewind_read_files P_((void));
196void finish_program P_((struct vector *));
197
198struct regex *compile_regex P_((struct buffer *b, int flags, int needed_sub));
199int match_regex P_((struct regex *regex,
200 char *buf, size_t buflen, size_t buf_start_offset,
201 struct re_registers *regarray, int regsize));
202#ifdef DEBUG_LEAKS
203void release_regex P_((struct regex *));
204#endif
205
206int process_files P_((struct vector *, char **argv));
207
208int main P_((int, char **));
209
210extern void fmt P_ ((const char *line, const char *line_end, int max_length, FILE *output_file));
211
212extern int extended_regexp_flags;
213
214#ifndef CONFIG_WITHOUT_O_OPT
215/* The output file, defaults to stdout but can be overridden
216 by the -o or --output option. main sets this to avoid problems. */
217extern FILE *sed_stdout;
218#endif
219
220/* If set, fflush(stdout) on every line output. */
221extern bool unbuffered_output;
222
223/* If set, don't write out the line unless explicitly told to. */
224extern bool no_default_output;
225
226/* If set, reset line counts on every new file. */
227extern bool separate_files;
228
229/* Do we need to be pedantically POSIX compliant? */
230extern enum posixicity_types posixicity;
231
232/* How long should the `l' command's output line be? */
233extern countT lcmd_out_line_len;
234
235/* How do we edit files in-place? (we don't if NULL) */
236extern char *in_place_extension;
237
238/* Should we use EREs? */
239extern bool use_extended_syntax_p;
240
241/* Declarations for multibyte character sets. */
242extern int mb_cur_max;
243
244#ifdef HAVE_MBRTOWC
245#ifdef HAVE_BTOWC
246#define MBRTOWC(pwc, s, n, ps) \
247 (mb_cur_max == 1 ? \
248 (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
249 mbrtowc ((pwc), (s), (n), (ps)))
250
251#define WCRTOMB(s, wc, ps) \
252 (mb_cur_max == 1 ? \
253 (*(s) = wctob ((wint_t) (wc)), 1) : \
254 wcrtomb ((s), (wc), (ps)))
255#else
256#define MBRTOWC(pwc, s, n, ps) \
257 mbrtowc ((pwc), (s), (n), (ps))
258
259#define WCRTOMB(s, wc, ps) \
260 wcrtomb ((s), (wc), (ps))
261#endif
262
263#define MBRLEN(s, n, ps) \
264 (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
265
266#define BRLEN(ch, ps) \
267 (mb_cur_max == 1 ? 1 : brlen (ch, ps))
268
269#else
270#define MBRLEN(s, n, ps) 1
271#define BRLEN(ch, ps) 1
272#endif
273
274extern int brlen P_ ((int ch, mbstate_t *ps));
275extern void initialize_mbcs P_ ((void));
276
Note: See TracBrowser for help on using the repository browser.