1 | /*
|
---|
2 | * field.c - routines for dealing with fields and record parsing
|
---|
3 | */
|
---|
4 |
|
---|
5 | /*
|
---|
6 | * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
|
---|
7 | *
|
---|
8 | * This file is part of GAWK, the GNU implementation of the
|
---|
9 | * AWK Programming Language.
|
---|
10 | *
|
---|
11 | * GAWK is free software; you can redistribute it and/or modify
|
---|
12 | * it under the terms of the GNU General Public License as published by
|
---|
13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
14 | * (at your option) any later version.
|
---|
15 | *
|
---|
16 | * GAWK is distributed in the hope that it will be useful,
|
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | * GNU General Public License for more details.
|
---|
20 | *
|
---|
21 | * You should have received a copy of the GNU General Public License
|
---|
22 | * along with this program; if not, write to the Free Software
|
---|
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
---|
24 | */
|
---|
25 |
|
---|
26 | #include "awk.h"
|
---|
27 |
|
---|
28 | /*
|
---|
29 | * In case that the system doesn't have isblank().
|
---|
30 | * Don't bother with autoconf ifdef junk, just force it.
|
---|
31 | * See dfa.c and regex_internal.h and regcomp.c. Bleah.
|
---|
32 | */
|
---|
33 | static int
|
---|
34 | is_blank(int c)
|
---|
35 | {
|
---|
36 | return c == ' ' || c == '\t';
|
---|
37 | }
|
---|
38 |
|
---|
39 | typedef void (* Setfunc) P((long, char *, long, NODE *));
|
---|
40 |
|
---|
41 | static long (*parse_field) P((long, char **, int, NODE *,
|
---|
42 | Regexp *, Setfunc, NODE *));
|
---|
43 | static void rebuild_record P((void));
|
---|
44 | static long re_parse_field P((long, char **, int, NODE *,
|
---|
45 | Regexp *, Setfunc, NODE *));
|
---|
46 | static long def_parse_field P((long, char **, int, NODE *,
|
---|
47 | Regexp *, Setfunc, NODE *));
|
---|
48 | static long posix_def_parse_field P((long, char **, int, NODE *,
|
---|
49 | Regexp *, Setfunc, NODE *));
|
---|
50 | static long null_parse_field P((long, char **, int, NODE *,
|
---|
51 | Regexp *, Setfunc, NODE *));
|
---|
52 | static long sc_parse_field P((long, char **, int, NODE *,
|
---|
53 | Regexp *, Setfunc, NODE *));
|
---|
54 | static long fw_parse_field P((long, char **, int, NODE *,
|
---|
55 | Regexp *, Setfunc, NODE *));
|
---|
56 | static void set_element P((long num, char * str, long len, NODE *arr));
|
---|
57 | static void grow_fields_arr P((long num));
|
---|
58 | static void set_field P((long num, char *str, long len, NODE *dummy));
|
---|
59 | static void update_PROCINFO P((char *subscript, char *str));
|
---|
60 |
|
---|
61 |
|
---|
62 | static char *parse_extent; /* marks where to restart parse of record */
|
---|
63 | static long parse_high_water = 0; /* field number that we have parsed so far */
|
---|
64 | static long nf_high_water = 0; /* size of fields_arr */
|
---|
65 | static int resave_fs;
|
---|
66 | static NODE *save_FS; /* save current value of FS when line is read,
|
---|
67 | * to be used in deferred parsing
|
---|
68 | */
|
---|
69 | static int *FIELDWIDTHS = NULL;
|
---|
70 |
|
---|
71 | NODE **fields_arr; /* array of pointers to the field nodes */
|
---|
72 | int field0_valid; /* $(>0) has not been changed yet */
|
---|
73 | int default_FS; /* TRUE when FS == " " */
|
---|
74 | Regexp *FS_re_yes_case = NULL;
|
---|
75 | Regexp *FS_re_no_case = NULL;
|
---|
76 | Regexp *FS_regexp = NULL;
|
---|
77 | NODE *Null_field = NULL;
|
---|
78 |
|
---|
79 | /* using_FIELDWIDTHS --- static function, macro to avoid overhead */
|
---|
80 | #define using_FIELDWIDTHS() (parse_field == fw_parse_field)
|
---|
81 |
|
---|
82 | /* init_fields --- set up the fields array to start with */
|
---|
83 |
|
---|
84 | void
|
---|
85 | init_fields()
|
---|
86 | {
|
---|
87 | emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
|
---|
88 | fields_arr[0] = Nnull_string;
|
---|
89 | parse_extent = fields_arr[0]->stptr;
|
---|
90 | save_FS = dupnode(FS_node->var_value);
|
---|
91 | getnode(Null_field);
|
---|
92 | *Null_field = *Nnull_string;
|
---|
93 | Null_field->flags |= FIELD;
|
---|
94 | Null_field->flags &= ~(NUMCUR|NUMBER|MAYBE_NUM|PERM);
|
---|
95 | field0_valid = TRUE;
|
---|
96 | }
|
---|
97 |
|
---|
98 | /* grow_fields --- acquire new fields as needed */
|
---|
99 |
|
---|
100 | static void
|
---|
101 | grow_fields_arr(long num)
|
---|
102 | {
|
---|
103 | register int t;
|
---|
104 | register NODE *n;
|
---|
105 |
|
---|
106 | erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
|
---|
107 | for (t = nf_high_water + 1; t <= num; t++) {
|
---|
108 | getnode(n);
|
---|
109 | *n = *Null_field;
|
---|
110 | fields_arr[t] = n;
|
---|
111 | }
|
---|
112 | nf_high_water = num;
|
---|
113 | }
|
---|
114 |
|
---|
115 | /* set_field --- set the value of a particular field */
|
---|
116 |
|
---|
117 | /*ARGSUSED*/
|
---|
118 | static void
|
---|
119 | set_field(long num,
|
---|
120 | char *str,
|
---|
121 | long len,
|
---|
122 | NODE *dummy ATTRIBUTE_UNUSED) /* just to make interface same as set_element */
|
---|
123 | {
|
---|
124 | register NODE *n;
|
---|
125 |
|
---|
126 | if (num > nf_high_water)
|
---|
127 | grow_fields_arr(num);
|
---|
128 | n = fields_arr[num];
|
---|
129 | n->stptr = str;
|
---|
130 | n->stlen = len;
|
---|
131 | n->flags = (STRCUR|STRING|MAYBE_NUM|FIELD);
|
---|
132 | }
|
---|
133 |
|
---|
134 | /* rebuild_record --- Someone assigned a value to $(something).
|
---|
135 | Fix up $0 to be right */
|
---|
136 |
|
---|
137 | static void
|
---|
138 | rebuild_record()
|
---|
139 | {
|
---|
140 | /*
|
---|
141 | * use explicit unsigned longs for lengths, in case
|
---|
142 | * a size_t isn't big enough.
|
---|
143 | */
|
---|
144 | register unsigned long tlen;
|
---|
145 | register unsigned long ofslen;
|
---|
146 | register NODE *tmp;
|
---|
147 | NODE *ofs;
|
---|
148 | char *ops;
|
---|
149 | register char *cops;
|
---|
150 | long i;
|
---|
151 |
|
---|
152 | assert(NF != -1);
|
---|
153 |
|
---|
154 | tlen = 0;
|
---|
155 | ofs = force_string(OFS_node->var_value);
|
---|
156 | ofslen = ofs->stlen;
|
---|
157 | for (i = NF; i > 0; i--) {
|
---|
158 | tmp = fields_arr[i];
|
---|
159 | tmp = force_string(tmp);
|
---|
160 | tlen += tmp->stlen;
|
---|
161 | }
|
---|
162 | tlen += (NF - 1) * ofslen;
|
---|
163 | if ((long) tlen < 0)
|
---|
164 | tlen = 0;
|
---|
165 | emalloc(ops, char *, tlen + 2, "rebuild_record");
|
---|
166 | cops = ops;
|
---|
167 | ops[0] = '\0';
|
---|
168 | for (i = 1; i <= NF; i++) {
|
---|
169 | tmp = fields_arr[i];
|
---|
170 | /* copy field */
|
---|
171 | if (tmp->stlen == 1)
|
---|
172 | *cops++ = tmp->stptr[0];
|
---|
173 | else if (tmp->stlen != 0) {
|
---|
174 | memcpy(cops, tmp->stptr, tmp->stlen);
|
---|
175 | cops += tmp->stlen;
|
---|
176 | }
|
---|
177 | /* copy OFS */
|
---|
178 | if (i != NF) {
|
---|
179 | if (ofslen == 1)
|
---|
180 | *cops++ = ofs->stptr[0];
|
---|
181 | else if (ofslen != 0) {
|
---|
182 | memcpy(cops, ofs->stptr, ofslen);
|
---|
183 | cops += ofslen;
|
---|
184 | }
|
---|
185 | }
|
---|
186 | }
|
---|
187 | tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
|
---|
188 |
|
---|
189 | /*
|
---|
190 | * Since we are about to unref fields_arr[0], we want to find
|
---|
191 | * any fields that still point into it, and have them point
|
---|
192 | * into the new field zero. This has to be done intelligently,
|
---|
193 | * so that unrefing a field doesn't try to unref into the old $0.
|
---|
194 | */
|
---|
195 | for (cops = ops, i = 1; i <= NF; i++) {
|
---|
196 | if (fields_arr[i]->stlen > 0) {
|
---|
197 | NODE *n;
|
---|
198 | getnode(n);
|
---|
199 |
|
---|
200 | if ((fields_arr[i]->flags & FIELD) == 0) {
|
---|
201 | *n = *Null_field;
|
---|
202 | n->stlen = fields_arr[i]->stlen;
|
---|
203 | if ((fields_arr[i]->flags & (NUMCUR|NUMBER)) != 0) {
|
---|
204 | n->flags |= (fields_arr[i]->flags & (NUMCUR|NUMBER));
|
---|
205 | n->numbr = fields_arr[i]->numbr;
|
---|
206 | }
|
---|
207 | } else {
|
---|
208 | *n = *(fields_arr[i]);
|
---|
209 | n->flags &= ~(MALLOC|TEMP|PERM|STRING);
|
---|
210 | }
|
---|
211 |
|
---|
212 | n->stptr = cops;
|
---|
213 | unref(fields_arr[i]);
|
---|
214 | fields_arr[i] = n;
|
---|
215 | }
|
---|
216 | cops += fields_arr[i]->stlen + ofslen;
|
---|
217 | }
|
---|
218 |
|
---|
219 | unref(fields_arr[0]);
|
---|
220 |
|
---|
221 | fields_arr[0] = tmp;
|
---|
222 | field0_valid = TRUE;
|
---|
223 | }
|
---|
224 |
|
---|
225 | /*
|
---|
226 | * set_record:
|
---|
227 | * setup $0, but defer parsing rest of line until reference is made to $(>0)
|
---|
228 | * or to NF. At that point, parse only as much as necessary.
|
---|
229 | *
|
---|
230 | * Manage a private buffer for the contents of $0. Doing so keeps us safe
|
---|
231 | * if `getline var' decides to rearrange the contents of the IOBUF that
|
---|
232 | * $0 might have been pointing into. The cost is the copying of the buffer;
|
---|
233 | * but better correct than fast.
|
---|
234 | */
|
---|
235 | void
|
---|
236 | set_record(const char *buf, int cnt)
|
---|
237 | {
|
---|
238 | NODE *n;
|
---|
239 | static char *databuf;
|
---|
240 | static unsigned long databuf_size;
|
---|
241 | #define INITIAL_SIZE 512
|
---|
242 | #define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */
|
---|
243 |
|
---|
244 | reset_record();
|
---|
245 |
|
---|
246 | /* buffer management: */
|
---|
247 | if (databuf_size == 0) { /* first time */
|
---|
248 | emalloc(databuf, char *, INITIAL_SIZE, "set_record");
|
---|
249 | databuf_size = INITIAL_SIZE;
|
---|
250 | memset(databuf, '\0', INITIAL_SIZE);
|
---|
251 |
|
---|
252 | }
|
---|
253 | /*
|
---|
254 | * Make sure there's enough room. Since we sometimes need
|
---|
255 | * to place a sentinel at the end, we make sure
|
---|
256 | * databuf_size is > cnt after allocation.
|
---|
257 | */
|
---|
258 | if (cnt >= databuf_size) {
|
---|
259 | while (cnt >= databuf_size && databuf_size <= MAX_SIZE)
|
---|
260 | databuf_size *= 2;
|
---|
261 | erealloc(databuf, char *, databuf_size, "set_record");
|
---|
262 | memset(databuf, '\0', databuf_size);
|
---|
263 | }
|
---|
264 | /* copy the data */
|
---|
265 | memcpy(databuf, buf, cnt);
|
---|
266 |
|
---|
267 | /* manage field 0: */
|
---|
268 | unref(fields_arr[0]);
|
---|
269 | getnode(n);
|
---|
270 | n->stptr = databuf;
|
---|
271 | n->stlen = cnt;
|
---|
272 | n->stref = 1;
|
---|
273 | n->type = Node_val;
|
---|
274 | n->stfmt = -1;
|
---|
275 | n->flags = (STRING|STRCUR|MAYBE_NUM|FIELD);
|
---|
276 | fields_arr[0] = n;
|
---|
277 |
|
---|
278 | #undef INITIAL_SIZE
|
---|
279 | #undef MAX_SIZE
|
---|
280 | }
|
---|
281 |
|
---|
282 | /* reset_record --- start over again with current $0 */
|
---|
283 |
|
---|
284 | void
|
---|
285 | reset_record()
|
---|
286 | {
|
---|
287 | register int i;
|
---|
288 | NODE *n;
|
---|
289 |
|
---|
290 | (void) force_string(fields_arr[0]);
|
---|
291 |
|
---|
292 | NF = -1;
|
---|
293 | for (i = 1; i <= parse_high_water; i++) {
|
---|
294 | unref(fields_arr[i]);
|
---|
295 | getnode(n);
|
---|
296 | *n = *Null_field;
|
---|
297 | fields_arr[i] = n;
|
---|
298 | }
|
---|
299 |
|
---|
300 | parse_high_water = 0;
|
---|
301 | /*
|
---|
302 | * $0 = $0 should resplit using the current value of FS.
|
---|
303 | */
|
---|
304 | if (resave_fs) {
|
---|
305 | resave_fs = FALSE;
|
---|
306 | unref(save_FS);
|
---|
307 | save_FS = dupnode(FS_node->var_value);
|
---|
308 | }
|
---|
309 |
|
---|
310 | field0_valid = TRUE;
|
---|
311 | }
|
---|
312 |
|
---|
313 | /* set_NF --- handle what happens to $0 and fields when NF is changed */
|
---|
314 |
|
---|
315 | void
|
---|
316 | set_NF()
|
---|
317 | {
|
---|
318 | register int i;
|
---|
319 | NODE *n;
|
---|
320 |
|
---|
321 | assert(NF != -1);
|
---|
322 |
|
---|
323 | NF = (long) force_number(NF_node->var_value);
|
---|
324 |
|
---|
325 | if (NF < 0)
|
---|
326 | fatal(_("NF set to negative value"));
|
---|
327 |
|
---|
328 | if (NF > nf_high_water)
|
---|
329 | grow_fields_arr(NF);
|
---|
330 | if (parse_high_water < NF) {
|
---|
331 | for (i = parse_high_water + 1; i >= 0 && i <= NF; i++) {
|
---|
332 | unref(fields_arr[i]);
|
---|
333 | getnode(n);
|
---|
334 | *n = *Null_field;
|
---|
335 | fields_arr[i] = n;
|
---|
336 | }
|
---|
337 | } else if (parse_high_water > 0) {
|
---|
338 | for (i = NF + 1; i >= 0 && i <= parse_high_water; i++) {
|
---|
339 | unref(fields_arr[i]);
|
---|
340 | getnode(n);
|
---|
341 | *n = *Null_field;
|
---|
342 | fields_arr[i] = n;
|
---|
343 | }
|
---|
344 | parse_high_water = NF;
|
---|
345 | }
|
---|
346 | field0_valid = FALSE;
|
---|
347 | }
|
---|
348 |
|
---|
349 | /*
|
---|
350 | * re_parse_field --- parse fields using a regexp.
|
---|
351 | *
|
---|
352 | * This is called both from get_field() and from do_split()
|
---|
353 | * via (*parse_field)(). This variation is for when FS is a regular
|
---|
354 | * expression -- either user-defined or because RS=="" and FS==" "
|
---|
355 | */
|
---|
356 | static long
|
---|
357 | re_parse_field(long up_to, /* parse only up to this field number */
|
---|
358 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
359 | int len,
|
---|
360 | NODE *fs ATTRIBUTE_UNUSED,
|
---|
361 | Regexp *rp,
|
---|
362 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
363 | NODE *n)
|
---|
364 | {
|
---|
365 | register char *scan = *buf;
|
---|
366 | register long nf = parse_high_water;
|
---|
367 | register char *field;
|
---|
368 | register char *end = scan + len;
|
---|
369 | #ifdef MBS_SUPPORT
|
---|
370 | size_t mbclen = 0;
|
---|
371 | mbstate_t mbs;
|
---|
372 | if (gawk_mb_cur_max > 1)
|
---|
373 | memset(&mbs, 0, sizeof(mbstate_t));
|
---|
374 | #endif
|
---|
375 |
|
---|
376 | if (up_to == UNLIMITED)
|
---|
377 | nf = 0;
|
---|
378 | if (len == 0)
|
---|
379 | return nf;
|
---|
380 |
|
---|
381 | if (RS_is_null && default_FS)
|
---|
382 | while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
|
---|
383 | scan++;
|
---|
384 | field = scan;
|
---|
385 | while (scan < end
|
---|
386 | && research(rp, scan, 0, (end - scan), RE_NEED_START) != -1
|
---|
387 | && nf < up_to) {
|
---|
388 | if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
|
---|
389 | #ifdef MBS_SUPPORT
|
---|
390 | if (gawk_mb_cur_max > 1) {
|
---|
391 | mbclen = mbrlen(scan, end-scan, &mbs);
|
---|
392 | if ((mbclen == 1) || (mbclen == (size_t) -1)
|
---|
393 | || (mbclen == (size_t) -2) || (mbclen == 0)) {
|
---|
394 | /* We treat it as a singlebyte character. */
|
---|
395 | mbclen = 1;
|
---|
396 | }
|
---|
397 | scan += mbclen;
|
---|
398 | } else
|
---|
399 | #endif
|
---|
400 | scan++;
|
---|
401 | if (scan == end) {
|
---|
402 | (*set)(++nf, field, (long)(scan - field), n);
|
---|
403 | up_to = nf;
|
---|
404 | break;
|
---|
405 | }
|
---|
406 | continue;
|
---|
407 | }
|
---|
408 | (*set)(++nf, field,
|
---|
409 | (long)(scan + RESTART(rp, scan) - field), n);
|
---|
410 | scan += REEND(rp, scan);
|
---|
411 | field = scan;
|
---|
412 | if (scan == end) /* FS at end of record */
|
---|
413 | (*set)(++nf, field, 0L, n);
|
---|
414 | }
|
---|
415 | if (nf != up_to && scan < end) {
|
---|
416 | (*set)(++nf, scan, (long)(end - scan), n);
|
---|
417 | scan = end;
|
---|
418 | }
|
---|
419 | *buf = scan;
|
---|
420 | return nf;
|
---|
421 | }
|
---|
422 |
|
---|
423 | /*
|
---|
424 | * def_parse_field --- default field parsing.
|
---|
425 | *
|
---|
426 | * This is called both from get_field() and from do_split()
|
---|
427 | * via (*parse_field)(). This variation is for when FS is a single space
|
---|
428 | * character.
|
---|
429 | */
|
---|
430 |
|
---|
431 | static long
|
---|
432 | def_parse_field(long up_to, /* parse only up to this field number */
|
---|
433 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
434 | int len,
|
---|
435 | NODE *fs,
|
---|
436 | Regexp *rp ATTRIBUTE_UNUSED,
|
---|
437 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
438 | NODE *n)
|
---|
439 | {
|
---|
440 | register char *scan = *buf;
|
---|
441 | register long nf = parse_high_water;
|
---|
442 | register char *field;
|
---|
443 | register char *end = scan + len;
|
---|
444 | char sav;
|
---|
445 |
|
---|
446 | if (up_to == UNLIMITED)
|
---|
447 | nf = 0;
|
---|
448 | if (len == 0)
|
---|
449 | return nf;
|
---|
450 |
|
---|
451 | /*
|
---|
452 | * Nasty special case. If FS set to "", return whole record
|
---|
453 | * as first field. This is not worth a separate function.
|
---|
454 | */
|
---|
455 | if (fs->stlen == 0) {
|
---|
456 | (*set)(++nf, *buf, len, n);
|
---|
457 | *buf += len;
|
---|
458 | return nf;
|
---|
459 | }
|
---|
460 |
|
---|
461 | /* before doing anything save the char at *end */
|
---|
462 | sav = *end;
|
---|
463 | /* because it will be destroyed now: */
|
---|
464 |
|
---|
465 | *end = ' '; /* sentinel character */
|
---|
466 | for (; nf < up_to; scan++) {
|
---|
467 | /*
|
---|
468 | * special case: fs is single space, strip leading whitespace
|
---|
469 | */
|
---|
470 | while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
|
---|
471 | scan++;
|
---|
472 | if (scan >= end)
|
---|
473 | break;
|
---|
474 | field = scan;
|
---|
475 | while (*scan != ' ' && *scan != '\t' && *scan != '\n')
|
---|
476 | scan++;
|
---|
477 | (*set)(++nf, field, (long)(scan - field), n);
|
---|
478 | if (scan == end)
|
---|
479 | break;
|
---|
480 | }
|
---|
481 |
|
---|
482 | /* everything done, restore original char at *end */
|
---|
483 | *end = sav;
|
---|
484 |
|
---|
485 | *buf = scan;
|
---|
486 | return nf;
|
---|
487 | }
|
---|
488 |
|
---|
489 | /*
|
---|
490 | * posix_def_parse_field --- default field parsing.
|
---|
491 | *
|
---|
492 | * This is called both from get_field() and from do_split()
|
---|
493 | * via (*parse_field)(). This variation is for when FS is a single space
|
---|
494 | * character. The only difference between this and def_parse_field()
|
---|
495 | * is that this one does not allow newlines to separate fields.
|
---|
496 | */
|
---|
497 |
|
---|
498 | static long
|
---|
499 | posix_def_parse_field(long up_to, /* parse only up to this field number */
|
---|
500 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
501 | int len,
|
---|
502 | NODE *fs,
|
---|
503 | Regexp *rp ATTRIBUTE_UNUSED,
|
---|
504 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
505 | NODE *n)
|
---|
506 | {
|
---|
507 | register char *scan = *buf;
|
---|
508 | register long nf = parse_high_water;
|
---|
509 | register char *field;
|
---|
510 | register char *end = scan + len;
|
---|
511 | char sav;
|
---|
512 |
|
---|
513 | if (up_to == UNLIMITED)
|
---|
514 | nf = 0;
|
---|
515 | if (len == 0)
|
---|
516 | return nf;
|
---|
517 |
|
---|
518 | /*
|
---|
519 | * Nasty special case. If FS set to "", return whole record
|
---|
520 | * as first field. This is not worth a separate function.
|
---|
521 | */
|
---|
522 | if (fs->stlen == 0) {
|
---|
523 | (*set)(++nf, *buf, len, n);
|
---|
524 | *buf += len;
|
---|
525 | return nf;
|
---|
526 | }
|
---|
527 |
|
---|
528 | /* before doing anything save the char at *end */
|
---|
529 | sav = *end;
|
---|
530 | /* because it will be destroyed now: */
|
---|
531 |
|
---|
532 | *end = ' '; /* sentinel character */
|
---|
533 | for (; nf < up_to; scan++) {
|
---|
534 | /*
|
---|
535 | * special case: fs is single space, strip leading whitespace
|
---|
536 | */
|
---|
537 | while (scan < end && (*scan == ' ' || *scan == '\t'))
|
---|
538 | scan++;
|
---|
539 | if (scan >= end)
|
---|
540 | break;
|
---|
541 | field = scan;
|
---|
542 | while (*scan != ' ' && *scan != '\t')
|
---|
543 | scan++;
|
---|
544 | (*set)(++nf, field, (long)(scan - field), n);
|
---|
545 | if (scan == end)
|
---|
546 | break;
|
---|
547 | }
|
---|
548 |
|
---|
549 | /* everything done, restore original char at *end */
|
---|
550 | *end = sav;
|
---|
551 |
|
---|
552 | *buf = scan;
|
---|
553 | return nf;
|
---|
554 | }
|
---|
555 |
|
---|
556 | /*
|
---|
557 | * null_parse_field --- each character is a separate field
|
---|
558 | *
|
---|
559 | * This is called both from get_field() and from do_split()
|
---|
560 | * via (*parse_field)(). This variation is for when FS is the null string.
|
---|
561 | */
|
---|
562 | static long
|
---|
563 | null_parse_field(long up_to, /* parse only up to this field number */
|
---|
564 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
565 | int len,
|
---|
566 | NODE *fs ATTRIBUTE_UNUSED,
|
---|
567 | Regexp *rp ATTRIBUTE_UNUSED,
|
---|
568 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
569 | NODE *n)
|
---|
570 | {
|
---|
571 | register char *scan = *buf;
|
---|
572 | register long nf = parse_high_water;
|
---|
573 | register char *end = scan + len;
|
---|
574 |
|
---|
575 | if (up_to == UNLIMITED)
|
---|
576 | nf = 0;
|
---|
577 | if (len == 0)
|
---|
578 | return nf;
|
---|
579 |
|
---|
580 | #ifdef MBS_SUPPORT
|
---|
581 | if (gawk_mb_cur_max > 1) {
|
---|
582 | mbstate_t mbs;
|
---|
583 | memset(&mbs, 0, sizeof(mbstate_t));
|
---|
584 | for (; nf < up_to && scan < end;) {
|
---|
585 | size_t mbclen = mbrlen(scan, end-scan, &mbs);
|
---|
586 | if ((mbclen == 1) || (mbclen == (size_t) -1)
|
---|
587 | || (mbclen == (size_t) -2) || (mbclen == 0)) {
|
---|
588 | /* We treat it as a singlebyte character. */
|
---|
589 | mbclen = 1;
|
---|
590 | }
|
---|
591 | (*set)(++nf, scan, mbclen, n);
|
---|
592 | scan += mbclen;
|
---|
593 | }
|
---|
594 | } else
|
---|
595 | #endif
|
---|
596 | for (; nf < up_to && scan < end; scan++)
|
---|
597 | (*set)(++nf, scan, 1L, n);
|
---|
598 |
|
---|
599 | *buf = scan;
|
---|
600 | return nf;
|
---|
601 | }
|
---|
602 |
|
---|
603 | /*
|
---|
604 | * sc_parse_field --- single character field separator
|
---|
605 | *
|
---|
606 | * This is called both from get_field() and from do_split()
|
---|
607 | * via (*parse_field)(). This variation is for when FS is a single character
|
---|
608 | * other than space.
|
---|
609 | */
|
---|
610 | static long
|
---|
611 | sc_parse_field(long up_to, /* parse only up to this field number */
|
---|
612 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
613 | int len,
|
---|
614 | NODE *fs,
|
---|
615 | Regexp *rp ATTRIBUTE_UNUSED,
|
---|
616 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
617 | NODE *n)
|
---|
618 | {
|
---|
619 | register char *scan = *buf;
|
---|
620 | register char fschar;
|
---|
621 | register long nf = parse_high_water;
|
---|
622 | register char *field;
|
---|
623 | register char *end = scan + len;
|
---|
624 | char sav;
|
---|
625 | #ifdef MBS_SUPPORT
|
---|
626 | size_t mbclen = 0;
|
---|
627 | mbstate_t mbs;
|
---|
628 | if (gawk_mb_cur_max > 1)
|
---|
629 | memset(&mbs, 0, sizeof(mbstate_t));
|
---|
630 | #endif
|
---|
631 |
|
---|
632 | if (up_to == UNLIMITED)
|
---|
633 | nf = 0;
|
---|
634 | if (len == 0)
|
---|
635 | return nf;
|
---|
636 |
|
---|
637 | if (RS_is_null && fs->stlen == 0)
|
---|
638 | fschar = '\n';
|
---|
639 | else
|
---|
640 | fschar = fs->stptr[0];
|
---|
641 |
|
---|
642 | /* before doing anything save the char at *end */
|
---|
643 | sav = *end;
|
---|
644 | /* because it will be destroyed now: */
|
---|
645 | *end = fschar; /* sentinel character */
|
---|
646 |
|
---|
647 | for (; nf < up_to;) {
|
---|
648 | field = scan;
|
---|
649 | #ifdef MBS_SUPPORT
|
---|
650 | if (gawk_mb_cur_max > 1) {
|
---|
651 | while (*scan != fschar) {
|
---|
652 | mbclen = mbrlen(scan, end-scan, &mbs);
|
---|
653 | if ((mbclen == 1) || (mbclen == (size_t) -1)
|
---|
654 | || (mbclen == (size_t) -2) || (mbclen == 0)) {
|
---|
655 | /* We treat it as a singlebyte character. */
|
---|
656 | mbclen = 1;
|
---|
657 | }
|
---|
658 | scan += mbclen;
|
---|
659 | }
|
---|
660 | } else
|
---|
661 | #endif
|
---|
662 | while (*scan != fschar)
|
---|
663 | scan++;
|
---|
664 | (*set)(++nf, field, (long)(scan - field), n);
|
---|
665 | if (scan == end)
|
---|
666 | break;
|
---|
667 | scan++;
|
---|
668 | if (scan == end) { /* FS at end of record */
|
---|
669 | (*set)(++nf, field, 0L, n);
|
---|
670 | break;
|
---|
671 | }
|
---|
672 | }
|
---|
673 |
|
---|
674 | /* everything done, restore original char at *end */
|
---|
675 | *end = sav;
|
---|
676 |
|
---|
677 | *buf = scan;
|
---|
678 | return nf;
|
---|
679 | }
|
---|
680 |
|
---|
681 | /*
|
---|
682 | * fw_parse_field --- field parsing using FIELDWIDTHS spec
|
---|
683 | *
|
---|
684 | * This is called from get_field() via (*parse_field)().
|
---|
685 | * This variation is for fields are fixed widths.
|
---|
686 | */
|
---|
687 | static long
|
---|
688 | fw_parse_field(long up_to, /* parse only up to this field number */
|
---|
689 | char **buf, /* on input: string to parse; on output: point to start next */
|
---|
690 | int len,
|
---|
691 | NODE *fs ATTRIBUTE_UNUSED,
|
---|
692 | Regexp *rp ATTRIBUTE_UNUSED,
|
---|
693 | Setfunc set, /* routine to set the value of the parsed field */
|
---|
694 | NODE *n)
|
---|
695 | {
|
---|
696 | register char *scan = *buf;
|
---|
697 | register long nf = parse_high_water;
|
---|
698 | register char *end = scan + len;
|
---|
699 |
|
---|
700 | if (up_to == UNLIMITED)
|
---|
701 | nf = 0;
|
---|
702 | if (len == 0)
|
---|
703 | return nf;
|
---|
704 | for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
|
---|
705 | if (len > end - scan)
|
---|
706 | len = end - scan;
|
---|
707 | (*set)(++nf, scan, (long) len, n);
|
---|
708 | scan += len;
|
---|
709 | }
|
---|
710 | if (len == -1)
|
---|
711 | *buf = end;
|
---|
712 | else
|
---|
713 | *buf = scan;
|
---|
714 | return nf;
|
---|
715 | }
|
---|
716 |
|
---|
717 | /* get_field --- return a particular $n */
|
---|
718 |
|
---|
719 | /* assign is not NULL if this field is on the LHS of an assign */
|
---|
720 |
|
---|
721 | NODE **
|
---|
722 | get_field(register long requested, Func_ptr *assign)
|
---|
723 | {
|
---|
724 | /*
|
---|
725 | * if requesting whole line but some other field has been altered,
|
---|
726 | * then the whole line must be rebuilt
|
---|
727 | */
|
---|
728 | if (requested == 0) {
|
---|
729 | if (! field0_valid) {
|
---|
730 | /* first, parse remainder of input record */
|
---|
731 | if (NF == -1) {
|
---|
732 | NF = (*parse_field)(UNLIMITED-1, &parse_extent,
|
---|
733 | fields_arr[0]->stlen -
|
---|
734 | (parse_extent - fields_arr[0]->stptr),
|
---|
735 | save_FS, FS_regexp, set_field,
|
---|
736 | (NODE *) NULL);
|
---|
737 | parse_high_water = NF;
|
---|
738 | }
|
---|
739 | rebuild_record();
|
---|
740 | }
|
---|
741 | if (assign != NULL)
|
---|
742 | *assign = reset_record;
|
---|
743 | return &fields_arr[0];
|
---|
744 | }
|
---|
745 |
|
---|
746 | /* assert(requested > 0); */
|
---|
747 |
|
---|
748 | if (assign != NULL)
|
---|
749 | field0_valid = FALSE; /* $0 needs reconstruction */
|
---|
750 |
|
---|
751 | if (requested <= parse_high_water) /* already parsed this field */
|
---|
752 | return &fields_arr[requested];
|
---|
753 |
|
---|
754 | if (NF == -1) { /* have not yet parsed to end of record */
|
---|
755 | /*
|
---|
756 | * parse up to requested fields, calling set_field() for each,
|
---|
757 | * saving in parse_extent the point where the parse left off
|
---|
758 | */
|
---|
759 | if (parse_high_water == 0) /* starting at the beginning */
|
---|
760 | parse_extent = fields_arr[0]->stptr;
|
---|
761 | parse_high_water = (*parse_field)(requested, &parse_extent,
|
---|
762 | fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
|
---|
763 | save_FS, FS_regexp, set_field, (NODE *) NULL);
|
---|
764 |
|
---|
765 | /*
|
---|
766 | * if we reached the end of the record, set NF to the number of
|
---|
767 | * fields so far. Note that requested might actually refer to
|
---|
768 | * a field that is beyond the end of the record, but we won't
|
---|
769 | * set NF to that value at this point, since this is only a
|
---|
770 | * reference to the field and NF only gets set if the field
|
---|
771 | * is assigned to -- this case is handled below
|
---|
772 | */
|
---|
773 | if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
|
---|
774 | NF = parse_high_water;
|
---|
775 | if (requested == UNLIMITED-1) /* UNLIMITED-1 means set NF */
|
---|
776 | requested = parse_high_water;
|
---|
777 | }
|
---|
778 | if (parse_high_water < requested) { /* requested beyond end of record */
|
---|
779 | if (assign != NULL) { /* expand record */
|
---|
780 | if (requested > nf_high_water)
|
---|
781 | grow_fields_arr(requested);
|
---|
782 |
|
---|
783 | NF = requested;
|
---|
784 | parse_high_water = requested;
|
---|
785 | } else
|
---|
786 | return &Null_field;
|
---|
787 | }
|
---|
788 |
|
---|
789 | return &fields_arr[requested];
|
---|
790 | }
|
---|
791 |
|
---|
792 | /* set_element --- set an array element, used by do_split() */
|
---|
793 |
|
---|
794 | static void
|
---|
795 | set_element(long num, char *s, long len, NODE *n)
|
---|
796 | {
|
---|
797 | register NODE *it;
|
---|
798 |
|
---|
799 | it = make_string(s, len);
|
---|
800 | it->flags |= MAYBE_NUM;
|
---|
801 | *assoc_lookup(n, tmp_number((AWKNUM) (num)), FALSE) = it;
|
---|
802 | }
|
---|
803 |
|
---|
804 | /* do_split --- implement split(), semantics are same as for field splitting */
|
---|
805 |
|
---|
806 | NODE *
|
---|
807 | do_split(NODE *tree)
|
---|
808 | {
|
---|
809 | NODE *src, *arr, *sep, *fs, *src2, *fs2, *tmp;
|
---|
810 | char *s;
|
---|
811 | long (*parseit) P((long, char **, int, NODE *,
|
---|
812 | Regexp *, Setfunc, NODE *));
|
---|
813 | Regexp *rp = NULL;
|
---|
814 |
|
---|
815 | src = force_string(tree_eval(tree->lnode));
|
---|
816 |
|
---|
817 | arr = get_param(tree->rnode->lnode);
|
---|
818 | if (arr->type != Node_var_array)
|
---|
819 | fatal(_("split: second argument is not an array"));
|
---|
820 |
|
---|
821 | sep = tree->rnode->rnode->lnode;
|
---|
822 |
|
---|
823 | if (src->stlen == 0) {
|
---|
824 | /*
|
---|
825 | * Skip the work if first arg is the null string.
|
---|
826 | */
|
---|
827 | free_temp(src);
|
---|
828 | /*
|
---|
829 | * Evaluate sep if it may have side effects.
|
---|
830 | */
|
---|
831 | if ((sep->re_flags & (FS_DFLT|CONST)) == 0)
|
---|
832 | free_temp(tree_eval(sep->re_exp));
|
---|
833 | /*
|
---|
834 | * And now we can safely turn off the array.
|
---|
835 | */
|
---|
836 | assoc_clear(arr);
|
---|
837 | return tmp_number((AWKNUM) 0);
|
---|
838 | }
|
---|
839 |
|
---|
840 | if ((sep->re_flags & FS_DFLT) != 0 && ! using_FIELDWIDTHS() && ! RS_is_null) {
|
---|
841 | parseit = parse_field;
|
---|
842 | fs = force_string(FS_node->var_value);
|
---|
843 | rp = FS_regexp;
|
---|
844 | } else {
|
---|
845 | fs = force_string(tree_eval(sep->re_exp));
|
---|
846 | if (fs->stlen == 0) {
|
---|
847 | static short warned = FALSE;
|
---|
848 |
|
---|
849 | parseit = null_parse_field;
|
---|
850 |
|
---|
851 | if (do_lint && ! warned) {
|
---|
852 | warned = TRUE;
|
---|
853 | lintwarn(_("split: null string for third arg is a gawk extension"));
|
---|
854 | }
|
---|
855 | } else if (fs->stlen == 1 && (sep->re_flags & CONST) == 0) {
|
---|
856 | if (fs->stptr[0] == ' ') {
|
---|
857 | if (do_posix)
|
---|
858 | parseit = posix_def_parse_field;
|
---|
859 | else
|
---|
860 | parseit = def_parse_field;
|
---|
861 | } else
|
---|
862 | parseit = sc_parse_field;
|
---|
863 | } else {
|
---|
864 | parseit = re_parse_field;
|
---|
865 | rp = re_update(sep);
|
---|
866 | }
|
---|
867 | }
|
---|
868 |
|
---|
869 | /*
|
---|
870 | * do dupnode(), to avoid problems like
|
---|
871 | * x = split(a["LINE"], a, a["FS"])
|
---|
872 | * since we assoc_clear the array. gack.
|
---|
873 | * this also gives us complete call by value semantics.
|
---|
874 | */
|
---|
875 | src2 = dupnode(src);
|
---|
876 | free_temp(src);
|
---|
877 |
|
---|
878 | fs2 = dupnode(fs);
|
---|
879 | free_temp(fs);
|
---|
880 |
|
---|
881 | assoc_clear(arr);
|
---|
882 |
|
---|
883 | s = src2->stptr;
|
---|
884 | tmp = tmp_number((AWKNUM) (*parseit)(UNLIMITED, &s, (int) src2->stlen,
|
---|
885 | fs2, rp, set_element, arr));
|
---|
886 | unref(src2);
|
---|
887 | unref(fs2);
|
---|
888 | return tmp;
|
---|
889 | }
|
---|
890 |
|
---|
891 | /* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
|
---|
892 |
|
---|
893 | void
|
---|
894 | set_FIELDWIDTHS()
|
---|
895 | {
|
---|
896 | register char *scan;
|
---|
897 | char *end;
|
---|
898 | register int i;
|
---|
899 | static int fw_alloc = 4;
|
---|
900 | static int warned = FALSE;
|
---|
901 | extern unsigned long strtoul P((const char *, char **endptr, int base));
|
---|
902 |
|
---|
903 | if (do_lint && ! warned) {
|
---|
904 | warned = TRUE;
|
---|
905 | lintwarn(_("`FIELDWIDTHS' is a gawk extension"));
|
---|
906 | }
|
---|
907 | if (do_traditional) /* quick and dirty, does the trick */
|
---|
908 | return;
|
---|
909 |
|
---|
910 | /*
|
---|
911 | * If changing the way fields are split, obey least-suprise
|
---|
912 | * semantics, and force $0 to be split totally.
|
---|
913 | */
|
---|
914 | if (fields_arr != NULL)
|
---|
915 | (void) get_field(UNLIMITED - 1, 0);
|
---|
916 |
|
---|
917 | parse_field = fw_parse_field;
|
---|
918 | scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
|
---|
919 | end = scan + 1;
|
---|
920 | if (FIELDWIDTHS == NULL)
|
---|
921 | emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
|
---|
922 | FIELDWIDTHS[0] = 0;
|
---|
923 | for (i = 1; ; i++) {
|
---|
924 | unsigned long int tmp;
|
---|
925 | if (i >= fw_alloc) {
|
---|
926 | fw_alloc *= 2;
|
---|
927 | erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
|
---|
928 | }
|
---|
929 | /* Ensure that there is no leading `-' sign. Otherwise,
|
---|
930 | strtoul would accept it and return a bogus result. */
|
---|
931 | while (is_blank(*scan)) {
|
---|
932 | ++scan;
|
---|
933 | }
|
---|
934 | if (*scan == '-')
|
---|
935 | fatal(_("invalid FIELDWIDTHS value, near `%s'"),
|
---|
936 | scan);
|
---|
937 |
|
---|
938 | /* Detect an invalid base-10 integer, a valid value that
|
---|
939 | is followed by something other than a blank or '\0',
|
---|
940 | or a value that is not in the range [1..INT_MAX]. */
|
---|
941 | errno = 0;
|
---|
942 | tmp = strtoul(scan, &end, 10);
|
---|
943 | if (errno != 0
|
---|
944 | || !(*end == '\0' || is_blank(*end))
|
---|
945 | || !(0 < tmp && tmp <= INT_MAX))
|
---|
946 | fatal(_("invalid FIELDWIDTHS value, near `%s'"),
|
---|
947 | scan);
|
---|
948 | FIELDWIDTHS[i] = tmp;
|
---|
949 | scan = end;
|
---|
950 | /* Skip past any trailing blanks. */
|
---|
951 | while (is_blank(*scan)) {
|
---|
952 | ++scan;
|
---|
953 | }
|
---|
954 | if (*scan == '\0')
|
---|
955 | break;
|
---|
956 | }
|
---|
957 | FIELDWIDTHS[i] = -1;
|
---|
958 |
|
---|
959 | update_PROCINFO("FS", "FIELDWIDTHS");
|
---|
960 | }
|
---|
961 |
|
---|
962 | /* set_FS --- handle things when FS is assigned to */
|
---|
963 |
|
---|
964 | void
|
---|
965 | set_FS()
|
---|
966 | {
|
---|
967 | char buf[10];
|
---|
968 | NODE *fs;
|
---|
969 | static NODE *save_fs = NULL;
|
---|
970 | static NODE *save_rs = NULL;
|
---|
971 | int remake_re = TRUE;
|
---|
972 |
|
---|
973 | /*
|
---|
974 | * If changing the way fields are split, obey least-suprise
|
---|
975 | * semantics, and force $0 to be split totally.
|
---|
976 | */
|
---|
977 | if (fields_arr != NULL)
|
---|
978 | (void) get_field(UNLIMITED - 1, 0);
|
---|
979 |
|
---|
980 | /* It's possible that only IGNORECASE changed, or FS = FS */
|
---|
981 | /*
|
---|
982 | * This comparison can't use cmp_nodes(), which pays attention
|
---|
983 | * to IGNORECASE, and that's not what we want.
|
---|
984 | */
|
---|
985 | if (save_fs
|
---|
986 | && FS_node->var_value->stlen == save_fs->stlen
|
---|
987 | && memcmp(FS_node->var_value->stptr, save_fs->stptr, save_fs->stlen) == 0
|
---|
988 | && save_rs
|
---|
989 | && RS_node->var_value->stlen == save_rs->stlen
|
---|
990 | && memcmp(RS_node->var_value->stptr, save_rs->stptr, save_rs->stlen) == 0) {
|
---|
991 | if (FS_regexp != NULL)
|
---|
992 | FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
---|
993 |
|
---|
994 | /* FS = FS */
|
---|
995 | if (! using_FIELDWIDTHS()) {
|
---|
996 | return;
|
---|
997 | } else {
|
---|
998 | remake_re = FALSE;
|
---|
999 | goto choose_fs_function;
|
---|
1000 | }
|
---|
1001 | }
|
---|
1002 |
|
---|
1003 | unref(save_fs);
|
---|
1004 | save_fs = dupnode(FS_node->var_value);
|
---|
1005 | unref(save_rs);
|
---|
1006 | save_rs = dupnode(RS_node->var_value);
|
---|
1007 | resave_fs = TRUE;
|
---|
1008 | if (FS_regexp != NULL) {
|
---|
1009 | refree(FS_re_yes_case);
|
---|
1010 | refree(FS_re_no_case);
|
---|
1011 | FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
---|
1012 | }
|
---|
1013 |
|
---|
1014 |
|
---|
1015 | choose_fs_function:
|
---|
1016 | buf[0] = '\0';
|
---|
1017 | default_FS = FALSE;
|
---|
1018 | fs = force_string(FS_node->var_value);
|
---|
1019 |
|
---|
1020 | if (! do_traditional && fs->stlen == 0) {
|
---|
1021 | static short warned = FALSE;
|
---|
1022 |
|
---|
1023 | parse_field = null_parse_field;
|
---|
1024 |
|
---|
1025 | if (do_lint && ! warned) {
|
---|
1026 | warned = TRUE;
|
---|
1027 | lintwarn(_("null string for `FS' is a gawk extension"));
|
---|
1028 | }
|
---|
1029 | } else if (fs->stlen > 1) {
|
---|
1030 | parse_field = re_parse_field;
|
---|
1031 | } else if (RS_is_null) {
|
---|
1032 | /* we know that fs->stlen <= 1 */
|
---|
1033 | parse_field = sc_parse_field;
|
---|
1034 | if (fs->stlen == 1) {
|
---|
1035 | if (fs->stptr[0] == ' ') {
|
---|
1036 | default_FS = TRUE;
|
---|
1037 | strcpy(buf, "[ \t\n]+");
|
---|
1038 | } else if (fs->stptr[0] == '\\') {
|
---|
1039 | /* yet another special case */
|
---|
1040 | strcpy(buf, "[\\\\\n]");
|
---|
1041 | } else if (fs->stptr[0] != '\n')
|
---|
1042 | sprintf(buf, "[%c\n]", fs->stptr[0]);
|
---|
1043 | }
|
---|
1044 | } else {
|
---|
1045 | if (do_posix)
|
---|
1046 | parse_field = posix_def_parse_field;
|
---|
1047 | else
|
---|
1048 | parse_field = def_parse_field;
|
---|
1049 |
|
---|
1050 | if (fs->stlen == 1) {
|
---|
1051 | if (fs->stptr[0] == ' ')
|
---|
1052 | default_FS = TRUE;
|
---|
1053 | else if (fs->stptr[0] == '\\')
|
---|
1054 | /* same special case */
|
---|
1055 | strcpy(buf, "[\\\\]");
|
---|
1056 | else
|
---|
1057 | parse_field = sc_parse_field;
|
---|
1058 | }
|
---|
1059 | }
|
---|
1060 | if (remake_re) {
|
---|
1061 | if (FS_regexp != NULL) {
|
---|
1062 | refree(FS_re_yes_case);
|
---|
1063 | refree(FS_re_no_case);
|
---|
1064 | FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 | if (buf[0] != '\0') {
|
---|
1068 | FS_re_yes_case = make_regexp(buf, strlen(buf), FALSE, TRUE);
|
---|
1069 | FS_re_no_case = make_regexp(buf, strlen(buf), TRUE, TRUE);
|
---|
1070 | FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
---|
1071 | parse_field = re_parse_field;
|
---|
1072 | } else if (parse_field == re_parse_field) {
|
---|
1073 | FS_re_yes_case = make_regexp(fs->stptr, fs->stlen, FALSE, TRUE);
|
---|
1074 | FS_re_no_case = make_regexp(fs->stptr, fs->stlen, TRUE, TRUE);
|
---|
1075 | FS_regexp = (IGNORECASE ? FS_re_no_case : FS_re_yes_case);
|
---|
1076 | } else
|
---|
1077 | FS_re_yes_case = FS_re_no_case = FS_regexp = NULL;
|
---|
1078 | }
|
---|
1079 |
|
---|
1080 | /*
|
---|
1081 | * For FS = "c", we don't use IGNORECASE. But we must use
|
---|
1082 | * re_parse_field to get the character and the newline as
|
---|
1083 | * field separators.
|
---|
1084 | */
|
---|
1085 | if (fs->stlen == 1 && parse_field == re_parse_field)
|
---|
1086 | FS_regexp = FS_re_yes_case;
|
---|
1087 |
|
---|
1088 | update_PROCINFO("FS", "FS");
|
---|
1089 | }
|
---|
1090 |
|
---|
1091 | /* using_fieldwidths --- is FS or FIELDWIDTHS in use? */
|
---|
1092 |
|
---|
1093 | int
|
---|
1094 | using_fieldwidths()
|
---|
1095 | {
|
---|
1096 | return using_FIELDWIDTHS();
|
---|
1097 | }
|
---|
1098 |
|
---|
1099 | /* update_PROCINFO --- update PROCINFO[sub] when FS or FIELDWIDTHS set */
|
---|
1100 |
|
---|
1101 | static void
|
---|
1102 | update_PROCINFO(char *subscript, char *str)
|
---|
1103 | {
|
---|
1104 | NODE **aptr;
|
---|
1105 |
|
---|
1106 | if (PROCINFO_node == NULL)
|
---|
1107 | return;
|
---|
1108 |
|
---|
1109 | aptr = assoc_lookup(PROCINFO_node, tmp_string(subscript, strlen(subscript)), FALSE);
|
---|
1110 | assign_val(aptr, tmp_string(str, strlen(str)));
|
---|
1111 | }
|
---|