| 1 | /* grep.c - main driver file for grep.
|
|---|
| 2 | Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This program is free software; you can redistribute it and/or modify
|
|---|
| 5 | it under the terms of the GNU General Public License as published by
|
|---|
| 6 | the Free Software Foundation; either version 3, or (at your option)
|
|---|
| 7 | any later version.
|
|---|
| 8 |
|
|---|
| 9 | This program is distributed in the hope that it will be useful,
|
|---|
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 12 | GNU General Public License for more details.
|
|---|
| 13 |
|
|---|
| 14 | You should have received a copy of the GNU General Public License
|
|---|
| 15 | along with this program; if not, write to the Free Software
|
|---|
| 16 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
|
|---|
| 17 | 02110-1301, USA. */
|
|---|
| 18 |
|
|---|
| 19 | /* Written July 1992 by Mike Haertel. */
|
|---|
| 20 |
|
|---|
| 21 | #include <config.h>
|
|---|
| 22 | #include <sys/types.h>
|
|---|
| 23 | #include <sys/stat.h>
|
|---|
| 24 | #include <wchar.h>
|
|---|
| 25 | #include <inttypes.h>
|
|---|
| 26 | #include <stdarg.h>
|
|---|
| 27 | #include <stdint.h>
|
|---|
| 28 | #include <stdio.h>
|
|---|
| 29 | #include "system.h"
|
|---|
| 30 |
|
|---|
| 31 | #include "argmatch.h"
|
|---|
| 32 | #include "c-ctype.h"
|
|---|
| 33 | #include "c-stack.h"
|
|---|
| 34 | #include "closeout.h"
|
|---|
| 35 | #include "colorize.h"
|
|---|
| 36 | #include "die.h"
|
|---|
| 37 | #include "error.h"
|
|---|
| 38 | #include "exclude.h"
|
|---|
| 39 | #include "exitfail.h"
|
|---|
| 40 | #include "fcntl-safer.h"
|
|---|
| 41 | #include "fts_.h"
|
|---|
| 42 | #include "getopt.h"
|
|---|
| 43 | #include "getprogname.h"
|
|---|
| 44 | #include "grep.h"
|
|---|
| 45 | #include "hash.h"
|
|---|
| 46 | #include "intprops.h"
|
|---|
| 47 | #include "propername.h"
|
|---|
| 48 | #include "safe-read.h"
|
|---|
| 49 | #include "search.h"
|
|---|
| 50 | #include "c-strcase.h"
|
|---|
| 51 | #include "version-etc.h"
|
|---|
| 52 | #include "xalloc.h"
|
|---|
| 53 | #include "xbinary-io.h"
|
|---|
| 54 | #include "xstrtol.h"
|
|---|
| 55 |
|
|---|
| 56 | enum { SEP_CHAR_SELECTED = ':' };
|
|---|
| 57 | enum { SEP_CHAR_REJECTED = '-' };
|
|---|
| 58 | static char const SEP_STR_GROUP[] = "--";
|
|---|
| 59 |
|
|---|
| 60 | /* When stdout is connected to a regular file, save its stat
|
|---|
| 61 | information here, so that we can automatically skip it, thus
|
|---|
| 62 | avoiding a potential (racy) infinite loop. */
|
|---|
| 63 | static struct stat out_stat;
|
|---|
| 64 |
|
|---|
| 65 | /* if non-zero, display usage information and exit */
|
|---|
| 66 | static int show_help;
|
|---|
| 67 |
|
|---|
| 68 | /* Print the version on standard output and exit. */
|
|---|
| 69 | static bool show_version;
|
|---|
| 70 |
|
|---|
| 71 | /* Suppress diagnostics for nonexistent or unreadable files. */
|
|---|
| 72 | static bool suppress_errors;
|
|---|
| 73 |
|
|---|
| 74 | /* If nonzero, use color markers. */
|
|---|
| 75 | static int color_option;
|
|---|
| 76 |
|
|---|
| 77 | /* Show only the part of a line matching the expression. */
|
|---|
| 78 | static bool only_matching;
|
|---|
| 79 |
|
|---|
| 80 | /* If nonzero, make sure first content char in a line is on a tab stop. */
|
|---|
| 81 | static bool align_tabs;
|
|---|
| 82 |
|
|---|
| 83 | /* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */
|
|---|
| 84 | static int offset_width;
|
|---|
| 85 |
|
|---|
| 86 | /* An entry in the PATLOC array saying where patterns came from. */
|
|---|
| 87 | struct patloc
|
|---|
| 88 | {
|
|---|
| 89 | /* Line number of the pattern in PATTERN_ARRAY. Line numbers
|
|---|
| 90 | start at 0, and each pattern is terminated by '\n'. */
|
|---|
| 91 | ptrdiff_t lineno;
|
|---|
| 92 |
|
|---|
| 93 | /* Input location of the pattern. The FILENAME "-" represents
|
|---|
| 94 | standard input, and "" represents the command line. FILELINE is
|
|---|
| 95 | origin-1 for files and is irrelevant for the command line. */
|
|---|
| 96 | char const *filename;
|
|---|
| 97 | ptrdiff_t fileline;
|
|---|
| 98 | };
|
|---|
| 99 |
|
|---|
| 100 | /* The array of pattern locations. The concatenation of all patterns
|
|---|
| 101 | is stored in a single array, KEYS. Given the invocation
|
|---|
| 102 | 'grep -f <(seq 5) -f <(seq 6) -f <(seq 3)', there will initially be
|
|---|
| 103 | 28 bytes in KEYS. After duplicate patterns are removed, KEYS
|
|---|
| 104 | will have 12 bytes and PATLOC will be {0,x,1}, {10,y,1}
|
|---|
| 105 | where x, y and z are just place-holders for shell-generated names
|
|---|
| 106 | since and z is omitted as it contains only duplicates. Sometimes
|
|---|
| 107 | removing duplicates will grow PATLOC, since each run of
|
|---|
| 108 | removed patterns not at a file start or end requires another
|
|---|
| 109 | PATLOC entry for the first non-removed pattern. */
|
|---|
| 110 | static struct patloc *patloc;
|
|---|
| 111 | static size_t patlocs_allocated, patlocs_used;
|
|---|
| 112 |
|
|---|
| 113 | /* Pointer to the array of patterns, each terminated by newline. */
|
|---|
| 114 | static char *pattern_array;
|
|---|
| 115 |
|
|---|
| 116 | /* The number of unique patterns seen so far. */
|
|---|
| 117 | static size_t n_patterns;
|
|---|
| 118 |
|
|---|
| 119 | /* Hash table of patterns seen so far. */
|
|---|
| 120 | static Hash_table *pattern_table;
|
|---|
| 121 |
|
|---|
| 122 | /* Hash and compare newline-terminated patterns for textual equality.
|
|---|
| 123 | Patterns are represented by origin-1 offsets into PATTERN_ARRAY,
|
|---|
| 124 | cast to void *. The origin-1 is so that the first pattern offset
|
|---|
| 125 | does not appear to be a null pointer when cast to void *. */
|
|---|
| 126 | static size_t _GL_ATTRIBUTE_PURE
|
|---|
| 127 | hash_pattern (void const *pat, size_t n_buckets)
|
|---|
| 128 | {
|
|---|
| 129 | size_t h = 0;
|
|---|
| 130 | intptr_t pat_offset = (intptr_t) pat - 1;
|
|---|
| 131 | unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
|
|---|
| 132 | for ( ; *s != '\n'; s++)
|
|---|
| 133 | h = h * 33 ^ *s;
|
|---|
| 134 | return h % n_buckets;
|
|---|
| 135 | }
|
|---|
| 136 | static bool _GL_ATTRIBUTE_PURE
|
|---|
| 137 | compare_patterns (void const *a, void const *b)
|
|---|
| 138 | {
|
|---|
| 139 | intptr_t a_offset = (intptr_t) a - 1;
|
|---|
| 140 | intptr_t b_offset = (intptr_t) b - 1;
|
|---|
| 141 | char const *p = pattern_array + a_offset;
|
|---|
| 142 | char const *q = pattern_array + b_offset;
|
|---|
| 143 | for (; *p == *q; p++, q++)
|
|---|
| 144 | if (*p == '\n')
|
|---|
| 145 | return true;
|
|---|
| 146 | return false;
|
|---|
| 147 | }
|
|---|
| 148 |
|
|---|
| 149 | /* Update KEYS to remove duplicate patterns, and return the number of
|
|---|
| 150 | bytes in the resulting KEYS. KEYS contains a sequence of patterns
|
|---|
| 151 | each terminated by '\n'. The first DUPFREE_SIZE bytes are a
|
|---|
| 152 | sequence of patterns with no duplicates; SIZE is the total number
|
|---|
| 153 | of bytes in KEYS. If some patterns past the first DUPFREE_SIZE
|
|---|
| 154 | bytes are not duplicates, update PATLOCS accordingly. */
|
|---|
| 155 | static ptrdiff_t
|
|---|
| 156 | update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
|
|---|
| 157 | char const *filename)
|
|---|
| 158 | {
|
|---|
| 159 | char *dst = keys + dupfree_size;
|
|---|
| 160 | ptrdiff_t fileline = 1;
|
|---|
| 161 | int prev_inserted = 0;
|
|---|
| 162 |
|
|---|
| 163 | char const *srclim = keys + size;
|
|---|
| 164 | ptrdiff_t patsize;
|
|---|
| 165 | for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
|
|---|
| 166 | {
|
|---|
| 167 | char const *patend = rawmemchr (src, '\n');
|
|---|
| 168 | patsize = patend + 1 - src;
|
|---|
| 169 | memmove (dst, src, patsize);
|
|---|
| 170 |
|
|---|
| 171 | intptr_t dst_offset_1 = dst - keys + 1;
|
|---|
| 172 | int inserted = hash_insert_if_absent (pattern_table,
|
|---|
| 173 | (void *) dst_offset_1, NULL);
|
|---|
| 174 | if (inserted)
|
|---|
| 175 | {
|
|---|
| 176 | if (inserted < 0)
|
|---|
| 177 | xalloc_die ();
|
|---|
| 178 | dst += patsize;
|
|---|
| 179 |
|
|---|
| 180 | /* Add a PATLOCS entry unless this input line is simply the
|
|---|
| 181 | next one in the same file. */
|
|---|
| 182 | if (!prev_inserted)
|
|---|
| 183 | {
|
|---|
| 184 | if (patlocs_used == patlocs_allocated)
|
|---|
| 185 | patloc = x2nrealloc (patloc, &patlocs_allocated,
|
|---|
| 186 | sizeof *patloc);
|
|---|
| 187 | patloc[patlocs_used++]
|
|---|
| 188 | = (struct patloc) { .lineno = n_patterns,
|
|---|
| 189 | .filename = filename,
|
|---|
| 190 | .fileline = fileline };
|
|---|
| 191 | }
|
|---|
| 192 | n_patterns++;
|
|---|
| 193 | }
|
|---|
| 194 |
|
|---|
| 195 | prev_inserted = inserted;
|
|---|
| 196 | fileline++;
|
|---|
| 197 | }
|
|---|
| 198 |
|
|---|
| 199 | return dst - keys;
|
|---|
| 200 | }
|
|---|
| 201 |
|
|---|
| 202 | /* Map LINENO, the origin-0 line number of one of the input patterns,
|
|---|
| 203 | to the name of the file from which it came. Return "-" if it was
|
|---|
| 204 | read from stdin, "" if it was specified on the command line.
|
|---|
| 205 | Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
|
|---|
| 206 | or to an unspecified value if PATTERN came from the command line. */
|
|---|
| 207 | char const * _GL_ATTRIBUTE_PURE
|
|---|
| 208 | pattern_file_name (size_t lineno, size_t *new_lineno)
|
|---|
| 209 | {
|
|---|
| 210 | ptrdiff_t i;
|
|---|
| 211 | for (i = 1; i < patlocs_used; i++)
|
|---|
| 212 | if (lineno < patloc[i].lineno)
|
|---|
| 213 | break;
|
|---|
| 214 | *new_lineno = lineno - patloc[i - 1].lineno + patloc[i - 1].fileline;
|
|---|
| 215 | return patloc[i - 1].filename;
|
|---|
| 216 | }
|
|---|
| 217 |
|
|---|
| 218 | #if HAVE_ASAN
|
|---|
| 219 | /* Record the starting address and length of the sole poisoned region,
|
|---|
| 220 | so that we can unpoison it later, just before each following read. */
|
|---|
| 221 | static void const *poison_buf;
|
|---|
| 222 | static size_t poison_len;
|
|---|
| 223 |
|
|---|
| 224 | static void
|
|---|
| 225 | clear_asan_poison (void)
|
|---|
| 226 | {
|
|---|
| 227 | if (poison_buf)
|
|---|
| 228 | __asan_unpoison_memory_region (poison_buf, poison_len);
|
|---|
| 229 | }
|
|---|
| 230 |
|
|---|
| 231 | static void
|
|---|
| 232 | asan_poison (void const *addr, size_t size)
|
|---|
| 233 | {
|
|---|
| 234 | poison_buf = addr;
|
|---|
| 235 | poison_len = size;
|
|---|
| 236 |
|
|---|
| 237 | __asan_poison_memory_region (poison_buf, poison_len);
|
|---|
| 238 | }
|
|---|
| 239 | #else
|
|---|
| 240 | static void clear_asan_poison (void) { }
|
|---|
| 241 | static void asan_poison (void const volatile *addr, size_t size) { }
|
|---|
| 242 | #endif
|
|---|
| 243 |
|
|---|
| 244 | /* The group separator used when context is requested. */
|
|---|
| 245 | static const char *group_separator = SEP_STR_GROUP;
|
|---|
| 246 |
|
|---|
| 247 | /* The context and logic for choosing default --color screen attributes
|
|---|
| 248 | (foreground and background colors, etc.) are the following.
|
|---|
| 249 | -- There are eight basic colors available, each with its own
|
|---|
| 250 | nominal luminosity to the human eye and foreground/background
|
|---|
| 251 | codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
|
|---|
| 252 | magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
|
|---|
| 253 | yellow [89 %, 33/43], and white [100 %, 37/47]).
|
|---|
| 254 | -- Sometimes, white as a background is actually implemented using
|
|---|
| 255 | a shade of light gray, so that a foreground white can be visible
|
|---|
| 256 | on top of it (but most often not).
|
|---|
| 257 | -- Sometimes, black as a foreground is actually implemented using
|
|---|
| 258 | a shade of dark gray, so that it can be visible on top of a
|
|---|
| 259 | background black (but most often not).
|
|---|
| 260 | -- Sometimes, more colors are available, as extensions.
|
|---|
| 261 | -- Other attributes can be selected/deselected (bold [1/22],
|
|---|
| 262 | underline [4/24], standout/inverse [7/27], blink [5/25], and
|
|---|
| 263 | invisible/hidden [8/28]). They are sometimes implemented by
|
|---|
| 264 | using colors instead of what their names imply; e.g., bold is
|
|---|
| 265 | often achieved by using brighter colors. In practice, only bold
|
|---|
| 266 | is really available to us, underline sometimes being mapped by
|
|---|
| 267 | the terminal to some strange color choice, and standout best
|
|---|
| 268 | being left for use by downstream programs such as less(1).
|
|---|
| 269 | -- We cannot assume that any of the extensions or special features
|
|---|
| 270 | are available for the purpose of choosing defaults for everyone.
|
|---|
| 271 | -- The most prevalent default terminal backgrounds are pure black
|
|---|
| 272 | and pure white, and are not necessarily the same shades of
|
|---|
| 273 | those as if they were selected explicitly with SGR sequences.
|
|---|
| 274 | Some terminals use dark or light pictures as default background,
|
|---|
| 275 | but those are covered over by an explicit selection of background
|
|---|
| 276 | color with an SGR sequence; their users will appreciate their
|
|---|
| 277 | background pictures not be covered like this, if possible.
|
|---|
| 278 | -- Some uses of colors attributes is to make some output items
|
|---|
| 279 | more understated (e.g., context lines); this cannot be achieved
|
|---|
| 280 | by changing the background color.
|
|---|
| 281 | -- For these reasons, the grep color defaults should strive not
|
|---|
| 282 | to change the background color from its default, unless it's
|
|---|
| 283 | for a short item that should be highlighted, not understated.
|
|---|
| 284 | -- The grep foreground color defaults (without an explicitly set
|
|---|
| 285 | background) should provide enough contrast to be readable on any
|
|---|
| 286 | terminal with either a black (dark) or white (light) background.
|
|---|
| 287 | This only leaves red, magenta, green, and cyan (and their bold
|
|---|
| 288 | counterparts) and possibly bold blue. */
|
|---|
| 289 | /* The color strings used for matched text.
|
|---|
| 290 | The user can overwrite them using the deprecated
|
|---|
| 291 | environment variable GREP_COLOR or the new GREP_COLORS. */
|
|---|
| 292 | static const char *selected_match_color = "01;31"; /* bold red */
|
|---|
| 293 | static const char *context_match_color = "01;31"; /* bold red */
|
|---|
| 294 |
|
|---|
| 295 | /* Other colors. Defaults look damn good. */
|
|---|
| 296 | static const char *filename_color = "35"; /* magenta */
|
|---|
| 297 | static const char *line_num_color = "32"; /* green */
|
|---|
| 298 | static const char *byte_num_color = "32"; /* green */
|
|---|
| 299 | static const char *sep_color = "36"; /* cyan */
|
|---|
| 300 | static const char *selected_line_color = ""; /* default color pair */
|
|---|
| 301 | static const char *context_line_color = ""; /* default color pair */
|
|---|
| 302 |
|
|---|
| 303 | /* Select Graphic Rendition (SGR, "\33[...m") strings. */
|
|---|
| 304 | /* Also Erase in Line (EL) to Right ("\33[K") by default. */
|
|---|
| 305 | /* Why have EL to Right after SGR?
|
|---|
| 306 | -- The behavior of line-wrapping when at the bottom of the
|
|---|
| 307 | terminal screen and at the end of the current line is often
|
|---|
| 308 | such that a new line is introduced, entirely cleared with
|
|---|
| 309 | the current background color which may be different from the
|
|---|
| 310 | default one (see the boolean back_color_erase terminfo(5)
|
|---|
| 311 | capability), thus scrolling the display by one line.
|
|---|
| 312 | The end of this new line will stay in this background color
|
|---|
| 313 | even after reverting to the default background color with
|
|---|
| 314 | "\33[m', unless it is explicitly cleared again with "\33[K"
|
|---|
| 315 | (which is the behavior the user would instinctively expect
|
|---|
| 316 | from the whole thing). There may be some unavoidable
|
|---|
| 317 | background-color flicker at the end of this new line because
|
|---|
| 318 | of this (when timing with the monitor's redraw is just right).
|
|---|
| 319 | -- The behavior of HT (tab, "\t") is usually the same as that of
|
|---|
| 320 | Cursor Forward Tabulation (CHT) with a default parameter
|
|---|
| 321 | of 1 ("\33[I"), i.e., it performs pure movement to the next
|
|---|
| 322 | tab stop, without any clearing of either content or screen
|
|---|
| 323 | attributes (including background color); try
|
|---|
| 324 | printf 'asdfqwerzxcv\rASDF\tZXCV\n'
|
|---|
| 325 | in a bash(1) shell to demonstrate this. This is not what the
|
|---|
| 326 | user would instinctively expect of HT (but is ok for CHT).
|
|---|
| 327 | The instinctive behavior would include clearing the terminal
|
|---|
| 328 | cells that are skipped over by HT with blank cells in the
|
|---|
| 329 | current screen attributes, including background color;
|
|---|
| 330 | the boolean dest_tabs_magic_smso terminfo(5) capability
|
|---|
| 331 | indicates this saner behavior for HT, but only some rare
|
|---|
| 332 | terminals have it (although it also indicates a special
|
|---|
| 333 | glitch with standout mode in the Teleray terminal for which
|
|---|
| 334 | it was initially introduced). The remedy is to add "\33K"
|
|---|
| 335 | after each SGR sequence, be it START (to fix the behavior
|
|---|
| 336 | of any HT after that before another SGR) or END (to fix the
|
|---|
| 337 | behavior of an HT in default background color that would
|
|---|
| 338 | follow a line-wrapping at the bottom of the screen in another
|
|---|
| 339 | background color, and to complement doing it after START).
|
|---|
| 340 | Piping grep's output through a pager such as less(1) avoids
|
|---|
| 341 | any HT problems since the pager performs tab expansion.
|
|---|
| 342 |
|
|---|
| 343 | Generic disadvantages of this remedy are:
|
|---|
| 344 | -- Some very rare terminals might support SGR but not EL (nobody
|
|---|
| 345 | will use "grep --color" on a terminal that does not support
|
|---|
| 346 | SGR in the first place).
|
|---|
| 347 | -- Having these extra control sequences might somewhat complicate
|
|---|
| 348 | the task of any program trying to parse "grep --color"
|
|---|
| 349 | output in order to extract structuring information from it.
|
|---|
| 350 | A specific disadvantage to doing it after SGR START is:
|
|---|
| 351 | -- Even more possible background color flicker (when timing
|
|---|
| 352 | with the monitor's redraw is just right), even when not at the
|
|---|
| 353 | bottom of the screen.
|
|---|
| 354 | There are no additional disadvantages specific to doing it after
|
|---|
| 355 | SGR END.
|
|---|
| 356 |
|
|---|
| 357 | It would be impractical for GNU grep to become a full-fledged
|
|---|
| 358 | terminal program linked against ncurses or the like, so it will
|
|---|
| 359 | not detect terminfo(5) capabilities. */
|
|---|
| 360 | static const char *sgr_start = "\33[%sm\33[K";
|
|---|
| 361 | static const char *sgr_end = "\33[m\33[K";
|
|---|
| 362 |
|
|---|
| 363 | /* SGR utility functions. */
|
|---|
| 364 | static void
|
|---|
| 365 | pr_sgr_start (char const *s)
|
|---|
| 366 | {
|
|---|
| 367 | if (*s)
|
|---|
| 368 | print_start_colorize (sgr_start, s);
|
|---|
| 369 | }
|
|---|
| 370 | static void
|
|---|
| 371 | pr_sgr_end (char const *s)
|
|---|
| 372 | {
|
|---|
| 373 | if (*s)
|
|---|
| 374 | print_end_colorize (sgr_end);
|
|---|
| 375 | }
|
|---|
| 376 | static void
|
|---|
| 377 | pr_sgr_start_if (char const *s)
|
|---|
| 378 | {
|
|---|
| 379 | if (color_option)
|
|---|
| 380 | pr_sgr_start (s);
|
|---|
| 381 | }
|
|---|
| 382 | static void
|
|---|
| 383 | pr_sgr_end_if (char const *s)
|
|---|
| 384 | {
|
|---|
| 385 | if (color_option)
|
|---|
| 386 | pr_sgr_end (s);
|
|---|
| 387 | }
|
|---|
| 388 |
|
|---|
| 389 | struct color_cap
|
|---|
| 390 | {
|
|---|
| 391 | const char *name;
|
|---|
| 392 | const char **var;
|
|---|
| 393 | void (*fct) (void);
|
|---|
| 394 | };
|
|---|
| 395 |
|
|---|
| 396 | static void
|
|---|
| 397 | color_cap_mt_fct (void)
|
|---|
| 398 | {
|
|---|
| 399 | /* Our caller just set selected_match_color. */
|
|---|
| 400 | context_match_color = selected_match_color;
|
|---|
| 401 | }
|
|---|
| 402 |
|
|---|
| 403 | static void
|
|---|
| 404 | color_cap_rv_fct (void)
|
|---|
| 405 | {
|
|---|
| 406 | /* By this point, it was 1 (or already -1). */
|
|---|
| 407 | color_option = -1; /* That's still != 0. */
|
|---|
| 408 | }
|
|---|
| 409 |
|
|---|
| 410 | static void
|
|---|
| 411 | color_cap_ne_fct (void)
|
|---|
| 412 | {
|
|---|
| 413 | sgr_start = "\33[%sm";
|
|---|
| 414 | sgr_end = "\33[m";
|
|---|
| 415 | }
|
|---|
| 416 |
|
|---|
| 417 | /* For GREP_COLORS. */
|
|---|
| 418 | static const struct color_cap color_dict[] =
|
|---|
| 419 | {
|
|---|
| 420 | { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
|
|---|
| 421 | { "ms", &selected_match_color, NULL }, /* selected matched text */
|
|---|
| 422 | { "mc", &context_match_color, NULL }, /* context matched text */
|
|---|
| 423 | { "fn", &filename_color, NULL }, /* filename */
|
|---|
| 424 | { "ln", &line_num_color, NULL }, /* line number */
|
|---|
| 425 | { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
|
|---|
| 426 | { "se", &sep_color, NULL }, /* separator */
|
|---|
| 427 | { "sl", &selected_line_color, NULL }, /* selected lines */
|
|---|
| 428 | { "cx", &context_line_color, NULL }, /* context lines */
|
|---|
| 429 | { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
|
|---|
| 430 | { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
|
|---|
| 431 | { NULL, NULL, NULL }
|
|---|
| 432 | };
|
|---|
| 433 |
|
|---|
| 434 | /* Saved errno value from failed output functions on stdout. */
|
|---|
| 435 | static int stdout_errno;
|
|---|
| 436 |
|
|---|
| 437 | static void
|
|---|
| 438 | putchar_errno (int c)
|
|---|
| 439 | {
|
|---|
| 440 | if (putchar (c) < 0)
|
|---|
| 441 | stdout_errno = errno;
|
|---|
| 442 | }
|
|---|
| 443 |
|
|---|
| 444 | static void
|
|---|
| 445 | fputs_errno (char const *s)
|
|---|
| 446 | {
|
|---|
| 447 | if (fputs (s, stdout) < 0)
|
|---|
| 448 | stdout_errno = errno;
|
|---|
| 449 | }
|
|---|
| 450 |
|
|---|
| 451 | static void _GL_ATTRIBUTE_FORMAT_PRINTF_STANDARD (1, 2)
|
|---|
| 452 | printf_errno (char const *format, ...)
|
|---|
| 453 | {
|
|---|
| 454 | va_list ap;
|
|---|
| 455 | va_start (ap, format);
|
|---|
| 456 | if (vfprintf (stdout, format, ap) < 0)
|
|---|
| 457 | stdout_errno = errno;
|
|---|
| 458 | va_end (ap);
|
|---|
| 459 | }
|
|---|
| 460 |
|
|---|
| 461 | static void
|
|---|
| 462 | fwrite_errno (void const *ptr, size_t size, size_t nmemb)
|
|---|
| 463 | {
|
|---|
| 464 | if (fwrite (ptr, size, nmemb, stdout) != nmemb)
|
|---|
| 465 | stdout_errno = errno;
|
|---|
| 466 | }
|
|---|
| 467 |
|
|---|
| 468 | static void
|
|---|
| 469 | fflush_errno (void)
|
|---|
| 470 | {
|
|---|
| 471 | if (fflush (stdout) != 0)
|
|---|
| 472 | stdout_errno = errno;
|
|---|
| 473 | }
|
|---|
| 474 |
|
|---|
| 475 | static struct exclude *excluded_patterns[2];
|
|---|
| 476 | static struct exclude *excluded_directory_patterns[2];
|
|---|
| 477 | /* Short options. */
|
|---|
| 478 | static char const short_options[] =
|
|---|
| 479 | "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
|
|---|
| 480 |
|
|---|
| 481 | /* Non-boolean long options that have no corresponding short equivalents. */
|
|---|
| 482 | enum
|
|---|
| 483 | {
|
|---|
| 484 | BINARY_FILES_OPTION = CHAR_MAX + 1,
|
|---|
| 485 | COLOR_OPTION,
|
|---|
| 486 | EXCLUDE_DIRECTORY_OPTION,
|
|---|
| 487 | EXCLUDE_OPTION,
|
|---|
| 488 | EXCLUDE_FROM_OPTION,
|
|---|
| 489 | GROUP_SEPARATOR_OPTION,
|
|---|
| 490 | INCLUDE_OPTION,
|
|---|
| 491 | LINE_BUFFERED_OPTION,
|
|---|
| 492 | LABEL_OPTION,
|
|---|
| 493 | NO_IGNORE_CASE_OPTION
|
|---|
| 494 | };
|
|---|
| 495 |
|
|---|
| 496 | /* Long options equivalences. */
|
|---|
| 497 | static struct option const long_options[] =
|
|---|
| 498 | {
|
|---|
| 499 | {"basic-regexp", no_argument, NULL, 'G'},
|
|---|
| 500 | {"extended-regexp", no_argument, NULL, 'E'},
|
|---|
| 501 | {"fixed-regexp", no_argument, NULL, 'F'},
|
|---|
| 502 | {"fixed-strings", no_argument, NULL, 'F'},
|
|---|
| 503 | {"perl-regexp", no_argument, NULL, 'P'},
|
|---|
| 504 | {"after-context", required_argument, NULL, 'A'},
|
|---|
| 505 | {"before-context", required_argument, NULL, 'B'},
|
|---|
| 506 | {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
|
|---|
| 507 | {"byte-offset", no_argument, NULL, 'b'},
|
|---|
| 508 | {"context", required_argument, NULL, 'C'},
|
|---|
| 509 | {"color", optional_argument, NULL, COLOR_OPTION},
|
|---|
| 510 | {"colour", optional_argument, NULL, COLOR_OPTION},
|
|---|
| 511 | {"count", no_argument, NULL, 'c'},
|
|---|
| 512 | {"devices", required_argument, NULL, 'D'},
|
|---|
| 513 | {"directories", required_argument, NULL, 'd'},
|
|---|
| 514 | {"exclude", required_argument, NULL, EXCLUDE_OPTION},
|
|---|
| 515 | {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
|
|---|
| 516 | {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
|
|---|
| 517 | {"file", required_argument, NULL, 'f'},
|
|---|
| 518 | {"files-with-matches", no_argument, NULL, 'l'},
|
|---|
| 519 | {"files-without-match", no_argument, NULL, 'L'},
|
|---|
| 520 | {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
|
|---|
| 521 | {"help", no_argument, &show_help, 1},
|
|---|
| 522 | {"include", required_argument, NULL, INCLUDE_OPTION},
|
|---|
| 523 | {"ignore-case", no_argument, NULL, 'i'},
|
|---|
| 524 | {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
|
|---|
| 525 | {"initial-tab", no_argument, NULL, 'T'},
|
|---|
| 526 | {"label", required_argument, NULL, LABEL_OPTION},
|
|---|
| 527 | {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
|
|---|
| 528 | {"line-number", no_argument, NULL, 'n'},
|
|---|
| 529 | {"line-regexp", no_argument, NULL, 'x'},
|
|---|
| 530 | {"max-count", required_argument, NULL, 'm'},
|
|---|
| 531 |
|
|---|
| 532 | {"no-filename", no_argument, NULL, 'h'},
|
|---|
| 533 | {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
|
|---|
| 534 | {"no-messages", no_argument, NULL, 's'},
|
|---|
| 535 | {"null", no_argument, NULL, 'Z'},
|
|---|
| 536 | {"null-data", no_argument, NULL, 'z'},
|
|---|
| 537 | {"only-matching", no_argument, NULL, 'o'},
|
|---|
| 538 | {"quiet", no_argument, NULL, 'q'},
|
|---|
| 539 | {"recursive", no_argument, NULL, 'r'},
|
|---|
| 540 | {"dereference-recursive", no_argument, NULL, 'R'},
|
|---|
| 541 | {"regexp", required_argument, NULL, 'e'},
|
|---|
| 542 | {"invert-match", no_argument, NULL, 'v'},
|
|---|
| 543 | {"silent", no_argument, NULL, 'q'},
|
|---|
| 544 | {"text", no_argument, NULL, 'a'},
|
|---|
| 545 | {"binary", no_argument, NULL, 'U'},
|
|---|
| 546 | {"unix-byte-offsets", no_argument, NULL, 'u'},
|
|---|
| 547 | {"version", no_argument, NULL, 'V'},
|
|---|
| 548 | {"with-filename", no_argument, NULL, 'H'},
|
|---|
| 549 | {"word-regexp", no_argument, NULL, 'w'},
|
|---|
| 550 | {0, 0, 0, 0}
|
|---|
| 551 | };
|
|---|
| 552 |
|
|---|
| 553 | /* Define flags declared in grep.h. */
|
|---|
| 554 | bool match_icase;
|
|---|
| 555 | bool match_words;
|
|---|
| 556 | bool match_lines;
|
|---|
| 557 | char eolbyte;
|
|---|
| 558 |
|
|---|
| 559 | /* For error messages. */
|
|---|
| 560 | /* The input file name, or (if standard input) null or a --label argument. */
|
|---|
| 561 | static char const *filename;
|
|---|
| 562 | /* Omit leading "./" from file names in diagnostics. */
|
|---|
| 563 | static bool omit_dot_slash;
|
|---|
| 564 | static bool errseen;
|
|---|
| 565 |
|
|---|
| 566 | /* True if output from the current input file has been suppressed
|
|---|
| 567 | because an output line had an encoding error. */
|
|---|
| 568 | static bool encoding_error_output;
|
|---|
| 569 |
|
|---|
| 570 | enum directories_type
|
|---|
| 571 | {
|
|---|
| 572 | READ_DIRECTORIES = 2,
|
|---|
| 573 | RECURSE_DIRECTORIES,
|
|---|
| 574 | SKIP_DIRECTORIES
|
|---|
| 575 | };
|
|---|
| 576 |
|
|---|
| 577 | /* How to handle directories. */
|
|---|
| 578 | static char const *const directories_args[] =
|
|---|
| 579 | {
|
|---|
| 580 | "read", "recurse", "skip", NULL
|
|---|
| 581 | };
|
|---|
| 582 | static enum directories_type const directories_types[] =
|
|---|
| 583 | {
|
|---|
| 584 | READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
|
|---|
| 585 | };
|
|---|
| 586 | ARGMATCH_VERIFY (directories_args, directories_types);
|
|---|
| 587 |
|
|---|
| 588 | static enum directories_type directories = READ_DIRECTORIES;
|
|---|
| 589 |
|
|---|
| 590 | enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
|
|---|
| 591 | static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
|
|---|
| 592 |
|
|---|
| 593 | /* How to handle devices. */
|
|---|
| 594 | static enum
|
|---|
| 595 | {
|
|---|
| 596 | READ_COMMAND_LINE_DEVICES,
|
|---|
| 597 | READ_DEVICES,
|
|---|
| 598 | SKIP_DEVICES
|
|---|
| 599 | } devices = READ_COMMAND_LINE_DEVICES;
|
|---|
| 600 |
|
|---|
| 601 | static bool grepfile (int, char const *, bool, bool);
|
|---|
| 602 | static bool grepdesc (int, bool);
|
|---|
| 603 |
|
|---|
| 604 | static bool
|
|---|
| 605 | is_device_mode (mode_t m)
|
|---|
| 606 | {
|
|---|
| 607 | return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
|
|---|
| 608 | }
|
|---|
| 609 |
|
|---|
| 610 | static bool
|
|---|
| 611 | skip_devices (bool command_line)
|
|---|
| 612 | {
|
|---|
| 613 | return (devices == SKIP_DEVICES
|
|---|
| 614 | || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
|
|---|
| 615 | }
|
|---|
| 616 |
|
|---|
| 617 | /* Return if ST->st_size is defined. Assume the file is not a
|
|---|
| 618 | symbolic link. */
|
|---|
| 619 | static bool
|
|---|
| 620 | usable_st_size (struct stat const *st)
|
|---|
| 621 | {
|
|---|
| 622 | return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
|
|---|
| 623 | }
|
|---|
| 624 |
|
|---|
| 625 | /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
|
|---|
| 626 | Do not rely on these finding data or holes if they equal SEEK_SET. */
|
|---|
| 627 | #ifndef SEEK_DATA
|
|---|
| 628 | enum { SEEK_DATA = SEEK_SET };
|
|---|
| 629 | #endif
|
|---|
| 630 | #ifndef SEEK_HOLE
|
|---|
| 631 | enum { SEEK_HOLE = SEEK_SET };
|
|---|
| 632 | #endif
|
|---|
| 633 |
|
|---|
| 634 | /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
|
|---|
| 635 | static bool seek_failed;
|
|---|
| 636 | static bool seek_data_failed;
|
|---|
| 637 |
|
|---|
| 638 | /* Functions we'll use to search. */
|
|---|
| 639 | typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
|
|---|
| 640 | typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
|
|---|
| 641 | char const *);
|
|---|
| 642 | static execute_fp_t execute;
|
|---|
| 643 | static void *compiled_pattern;
|
|---|
| 644 |
|
|---|
| 645 | char const *
|
|---|
| 646 | input_filename (void)
|
|---|
| 647 | {
|
|---|
| 648 | if (!filename)
|
|---|
| 649 | filename = _("(standard input)");
|
|---|
| 650 | return filename;
|
|---|
| 651 | }
|
|---|
| 652 |
|
|---|
| 653 | /* Unless requested, diagnose an error about the input file. */
|
|---|
| 654 | static void
|
|---|
| 655 | suppressible_error (int errnum)
|
|---|
| 656 | {
|
|---|
| 657 | if (! suppress_errors)
|
|---|
| 658 | error (0, errnum, "%s", input_filename ());
|
|---|
| 659 | errseen = true;
|
|---|
| 660 | }
|
|---|
| 661 |
|
|---|
| 662 | /* If there has already been a write error, don't bother closing
|
|---|
| 663 | standard output, as that might elicit a duplicate diagnostic. */
|
|---|
| 664 | static void
|
|---|
| 665 | clean_up_stdout (void)
|
|---|
| 666 | {
|
|---|
| 667 | if (! stdout_errno)
|
|---|
| 668 | close_stdout ();
|
|---|
| 669 | }
|
|---|
| 670 |
|
|---|
| 671 | /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
|
|---|
| 672 | is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
|
|---|
| 673 | the alignment and would otherwise complain about the cast. */
|
|---|
| 674 | #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
|
|---|
| 675 | # define CAST_ALIGNED(type, val) \
|
|---|
| 676 | ({ __typeof__ (val) val_ = val; \
|
|---|
| 677 | _Pragma ("GCC diagnostic push") \
|
|---|
| 678 | _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
|
|---|
| 679 | (type) val_; \
|
|---|
| 680 | _Pragma ("GCC diagnostic pop") \
|
|---|
| 681 | })
|
|---|
| 682 | #else
|
|---|
| 683 | # define CAST_ALIGNED(type, val) ((type) (val))
|
|---|
| 684 | #endif
|
|---|
| 685 |
|
|---|
| 686 | /* An unsigned type suitable for fast matching. */
|
|---|
| 687 | typedef uintmax_t uword;
|
|---|
| 688 | static uword const uword_max = UINTMAX_MAX;
|
|---|
| 689 |
|
|---|
| 690 | struct localeinfo localeinfo;
|
|---|
| 691 |
|
|---|
| 692 | /* A mask to test for unibyte characters, with the pattern repeated to
|
|---|
| 693 | fill a uword. For a multibyte character encoding where
|
|---|
| 694 | all bytes are unibyte characters, this is 0. For UTF-8, this is
|
|---|
| 695 | 0x808080.... For encodings where unibyte characters have no discerned
|
|---|
| 696 | pattern, this is all 1s. The unsigned char C is a unibyte
|
|---|
| 697 | character if C & UNIBYTE_MASK is zero. If the uword W is the
|
|---|
| 698 | concatenation of bytes, the bytes are all unibyte characters
|
|---|
| 699 | if W & UNIBYTE_MASK is zero. */
|
|---|
| 700 | static uword unibyte_mask;
|
|---|
| 701 |
|
|---|
| 702 | static void
|
|---|
| 703 | initialize_unibyte_mask (void)
|
|---|
| 704 | {
|
|---|
| 705 | /* For each encoding error I that MASK does not already match,
|
|---|
| 706 | accumulate I's most significant 1 bit by ORing it into MASK.
|
|---|
| 707 | Although any 1 bit of I could be used, in practice high-order
|
|---|
| 708 | bits work better. */
|
|---|
| 709 | unsigned char mask = 0;
|
|---|
| 710 | int ms1b = 1;
|
|---|
| 711 | for (int i = 1; i <= UCHAR_MAX; i++)
|
|---|
| 712 | if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
|
|---|
| 713 | {
|
|---|
| 714 | while (ms1b * 2 <= i)
|
|---|
| 715 | ms1b *= 2;
|
|---|
| 716 | mask |= ms1b;
|
|---|
| 717 | }
|
|---|
| 718 |
|
|---|
| 719 | /* Now MASK will detect any encoding-error byte, although it may
|
|---|
| 720 | cry wolf and it may not be optimal. Build a uword-length mask by
|
|---|
| 721 | repeating MASK. */
|
|---|
| 722 | unibyte_mask = uword_max / UCHAR_MAX * mask;
|
|---|
| 723 | }
|
|---|
| 724 |
|
|---|
| 725 | /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
|
|---|
| 726 | that is not easy, and return a pointer to the first non-easy byte.
|
|---|
| 727 | The easy bytes all have UNIBYTE_MASK off. */
|
|---|
| 728 | static char const * _GL_ATTRIBUTE_PURE
|
|---|
| 729 | skip_easy_bytes (char const *buf)
|
|---|
| 730 | {
|
|---|
| 731 | /* Search a byte at a time until the pointer is aligned, then a
|
|---|
| 732 | uword at a time until a match is found, then a byte at a time to
|
|---|
| 733 | identify the exact byte. The uword search may go slightly past
|
|---|
| 734 | the buffer end, but that's benign. */
|
|---|
| 735 | char const *p;
|
|---|
| 736 | uword const *s;
|
|---|
| 737 | for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
|
|---|
| 738 | if (to_uchar (*p) & unibyte_mask)
|
|---|
| 739 | return p;
|
|---|
| 740 | for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
|
|---|
| 741 | continue;
|
|---|
| 742 | for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
|
|---|
| 743 | continue;
|
|---|
| 744 | return p;
|
|---|
| 745 | }
|
|---|
| 746 |
|
|---|
| 747 | /* Return true if BUF, of size SIZE, has an encoding error.
|
|---|
| 748 | BUF must be followed by at least sizeof (uword) bytes,
|
|---|
| 749 | the first of which may be modified. */
|
|---|
| 750 | static bool
|
|---|
| 751 | buf_has_encoding_errors (char *buf, size_t size)
|
|---|
| 752 | {
|
|---|
| 753 | if (! unibyte_mask)
|
|---|
| 754 | return false;
|
|---|
| 755 |
|
|---|
| 756 | mbstate_t mbs = { 0 };
|
|---|
| 757 | size_t clen;
|
|---|
| 758 |
|
|---|
| 759 | buf[size] = -1;
|
|---|
| 760 | for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
|
|---|
| 761 | {
|
|---|
| 762 | clen = mbrlen (p, buf + size - p, &mbs);
|
|---|
| 763 | if ((size_t) -2 <= clen)
|
|---|
| 764 | return true;
|
|---|
| 765 | }
|
|---|
| 766 |
|
|---|
| 767 | return false;
|
|---|
| 768 | }
|
|---|
| 769 |
|
|---|
| 770 |
|
|---|
| 771 | /* Return true if BUF, of size SIZE, has a null byte.
|
|---|
| 772 | BUF must be followed by at least one byte,
|
|---|
| 773 | which may be arbitrarily written to or read from. */
|
|---|
| 774 | static bool
|
|---|
| 775 | buf_has_nulls (char *buf, size_t size)
|
|---|
| 776 | {
|
|---|
| 777 | buf[size] = 0;
|
|---|
| 778 | return strlen (buf) != size;
|
|---|
| 779 | }
|
|---|
| 780 |
|
|---|
| 781 | /* Return true if a file is known to contain null bytes.
|
|---|
| 782 | SIZE bytes have already been read from the file
|
|---|
| 783 | with descriptor FD and status ST. */
|
|---|
| 784 | static bool
|
|---|
| 785 | file_must_have_nulls (size_t size, int fd, struct stat const *st)
|
|---|
| 786 | {
|
|---|
| 787 | /* If the file has holes, it must contain a null byte somewhere. */
|
|---|
| 788 | if (SEEK_HOLE != SEEK_SET && !seek_failed
|
|---|
| 789 | && usable_st_size (st) && size < st->st_size)
|
|---|
| 790 | {
|
|---|
| 791 | off_t cur = size;
|
|---|
| 792 | if (O_BINARY || fd == STDIN_FILENO)
|
|---|
| 793 | {
|
|---|
| 794 | cur = lseek (fd, 0, SEEK_CUR);
|
|---|
| 795 | if (cur < 0)
|
|---|
| 796 | return false;
|
|---|
| 797 | }
|
|---|
| 798 |
|
|---|
| 799 | /* Look for a hole after the current location. */
|
|---|
| 800 | off_t hole_start = lseek (fd, cur, SEEK_HOLE);
|
|---|
| 801 | if (0 <= hole_start)
|
|---|
| 802 | {
|
|---|
| 803 | if (lseek (fd, cur, SEEK_SET) < 0)
|
|---|
| 804 | suppressible_error (errno);
|
|---|
| 805 | if (hole_start < st->st_size)
|
|---|
| 806 | return true;
|
|---|
| 807 | }
|
|---|
| 808 | }
|
|---|
| 809 |
|
|---|
| 810 | return false;
|
|---|
| 811 | }
|
|---|
| 812 |
|
|---|
| 813 | /* Convert STR to a nonnegative integer, storing the result in *OUT.
|
|---|
| 814 | STR must be a valid context length argument; report an error if it
|
|---|
| 815 | isn't. Silently ceiling *OUT at the maximum value, as that is
|
|---|
| 816 | practically equivalent to infinity for grep's purposes. */
|
|---|
| 817 | static void
|
|---|
| 818 | context_length_arg (char const *str, intmax_t *out)
|
|---|
| 819 | {
|
|---|
| 820 | switch (xstrtoimax (str, 0, 10, out, ""))
|
|---|
| 821 | {
|
|---|
| 822 | case LONGINT_OK:
|
|---|
| 823 | case LONGINT_OVERFLOW:
|
|---|
| 824 | if (0 <= *out)
|
|---|
| 825 | break;
|
|---|
| 826 | FALLTHROUGH;
|
|---|
| 827 | default:
|
|---|
| 828 | die (EXIT_TROUBLE, 0, "%s: %s", str,
|
|---|
| 829 | _("invalid context length argument"));
|
|---|
| 830 | }
|
|---|
| 831 | }
|
|---|
| 832 |
|
|---|
| 833 | /* Return the add_exclude options suitable for excluding a file name.
|
|---|
| 834 | If COMMAND_LINE, it is a command-line file name. */
|
|---|
| 835 | static int
|
|---|
| 836 | exclude_options (bool command_line)
|
|---|
| 837 | {
|
|---|
| 838 | return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
|
|---|
| 839 | }
|
|---|
| 840 |
|
|---|
| 841 | /* Return true if the file with NAME should be skipped.
|
|---|
| 842 | If COMMAND_LINE, it is a command-line argument.
|
|---|
| 843 | If IS_DIR, it is a directory. */
|
|---|
| 844 | static bool
|
|---|
| 845 | skipped_file (char const *name, bool command_line, bool is_dir)
|
|---|
| 846 | {
|
|---|
| 847 | struct exclude **pats;
|
|---|
| 848 | if (! is_dir)
|
|---|
| 849 | pats = excluded_patterns;
|
|---|
| 850 | else if (directories == SKIP_DIRECTORIES)
|
|---|
| 851 | return true;
|
|---|
| 852 | else if (command_line && omit_dot_slash)
|
|---|
| 853 | return false;
|
|---|
| 854 | else
|
|---|
| 855 | pats = excluded_directory_patterns;
|
|---|
| 856 | return pats[command_line] && excluded_file_name (pats[command_line], name);
|
|---|
| 857 | }
|
|---|
| 858 |
|
|---|
| 859 | /* Hairy buffering mechanism for grep. The intent is to keep
|
|---|
| 860 | all reads aligned on a page boundary and multiples of the
|
|---|
| 861 | page size, unless a read yields a partial page. */
|
|---|
| 862 |
|
|---|
| 863 | static char *buffer; /* Base of buffer. */
|
|---|
| 864 | static size_t bufalloc; /* Allocated buffer size, counting slop. */
|
|---|
| 865 | static int bufdesc; /* File descriptor. */
|
|---|
| 866 | static char *bufbeg; /* Beginning of user-visible stuff. */
|
|---|
| 867 | static char *buflim; /* Limit of user-visible stuff. */
|
|---|
| 868 | static size_t pagesize; /* alignment of memory pages */
|
|---|
| 869 | static off_t bufoffset; /* Read offset. */
|
|---|
| 870 | static off_t after_last_match; /* Pointer after last matching line that
|
|---|
| 871 | would have been output if we were
|
|---|
| 872 | outputting characters. */
|
|---|
| 873 | static bool skip_nuls; /* Skip '\0' in data. */
|
|---|
| 874 | static bool skip_empty_lines; /* Skip empty lines in data. */
|
|---|
| 875 | static uintmax_t totalnl; /* Total newline count before lastnl. */
|
|---|
| 876 |
|
|---|
| 877 | /* Initial buffer size, not counting slop. */
|
|---|
| 878 | enum { INITIAL_BUFSIZE = 96 * 1024 };
|
|---|
| 879 |
|
|---|
| 880 | /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
|
|---|
| 881 | an integer or a pointer. Both args must be free of side effects. */
|
|---|
| 882 | #define ALIGN_TO(val, alignment) \
|
|---|
| 883 | ((uintptr_t) (val) % (alignment) == 0 \
|
|---|
| 884 | ? (val) \
|
|---|
| 885 | : (val) + ((alignment) - (uintptr_t) (val) % (alignment)))
|
|---|
| 886 |
|
|---|
| 887 | /* Add two numbers that count input bytes or lines, and report an
|
|---|
| 888 | error if the addition overflows. */
|
|---|
| 889 | static uintmax_t
|
|---|
| 890 | add_count (uintmax_t a, uintmax_t b)
|
|---|
| 891 | {
|
|---|
| 892 | uintmax_t sum = a + b;
|
|---|
| 893 | if (sum < a)
|
|---|
| 894 | die (EXIT_TROUBLE, 0, _("input is too large to count"));
|
|---|
| 895 | return sum;
|
|---|
| 896 | }
|
|---|
| 897 |
|
|---|
| 898 | /* Return true if BUF (of size SIZE) is all zeros. */
|
|---|
| 899 | static bool
|
|---|
| 900 | all_zeros (char const *buf, size_t size)
|
|---|
| 901 | {
|
|---|
| 902 | for (char const *p = buf; p < buf + size; p++)
|
|---|
| 903 | if (*p)
|
|---|
| 904 | return false;
|
|---|
| 905 | return true;
|
|---|
| 906 | }
|
|---|
| 907 |
|
|---|
| 908 | /* Reset the buffer for a new file, returning false if we should skip it.
|
|---|
| 909 | Initialize on the first time through. */
|
|---|
| 910 | static bool
|
|---|
| 911 | reset (int fd, struct stat const *st)
|
|---|
| 912 | {
|
|---|
| 913 | bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
|
|---|
| 914 | bufbeg[-1] = eolbyte;
|
|---|
| 915 | bufdesc = fd;
|
|---|
| 916 | bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
|
|---|
| 917 | seek_failed = bufoffset < 0;
|
|---|
| 918 |
|
|---|
| 919 | /* Assume SEEK_DATA fails if SEEK_CUR does. */
|
|---|
| 920 | seek_data_failed = seek_failed;
|
|---|
| 921 |
|
|---|
| 922 | if (seek_failed)
|
|---|
| 923 | {
|
|---|
| 924 | if (errno != ESPIPE)
|
|---|
| 925 | {
|
|---|
| 926 | suppressible_error (errno);
|
|---|
| 927 | return false;
|
|---|
| 928 | }
|
|---|
| 929 | bufoffset = 0;
|
|---|
| 930 | }
|
|---|
| 931 | return true;
|
|---|
| 932 | }
|
|---|
| 933 |
|
|---|
| 934 | /* Read new stuff into the buffer, saving the specified
|
|---|
| 935 | amount of old stuff. When we're done, 'bufbeg' points
|
|---|
| 936 | to the beginning of the buffer contents, and 'buflim'
|
|---|
| 937 | points just after the end. Return false if there's an error. */
|
|---|
| 938 | static bool
|
|---|
| 939 | fillbuf (size_t save, struct stat const *st)
|
|---|
| 940 | {
|
|---|
| 941 | size_t fillsize;
|
|---|
| 942 | bool cc = true;
|
|---|
| 943 | char *readbuf;
|
|---|
| 944 | size_t readsize;
|
|---|
| 945 |
|
|---|
| 946 | if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
|
|---|
| 947 | readbuf = buflim;
|
|---|
| 948 | else
|
|---|
| 949 | {
|
|---|
| 950 | size_t minsize = save + pagesize;
|
|---|
| 951 | size_t newsize;
|
|---|
| 952 | size_t newalloc;
|
|---|
| 953 | char *newbuf;
|
|---|
| 954 |
|
|---|
| 955 | /* Grow newsize until it is at least as great as minsize. */
|
|---|
| 956 | for (newsize = bufalloc - pagesize - sizeof (uword);
|
|---|
| 957 | newsize < minsize;
|
|---|
| 958 | newsize *= 2)
|
|---|
| 959 | if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
|
|---|
| 960 | xalloc_die ();
|
|---|
| 961 |
|
|---|
| 962 | /* Try not to allocate more memory than the file size indicates,
|
|---|
| 963 | as that might cause unnecessary memory exhaustion if the file
|
|---|
| 964 | is large. However, do not use the original file size as a
|
|---|
| 965 | heuristic if we've already read past the file end, as most
|
|---|
| 966 | likely the file is growing. */
|
|---|
| 967 | if (usable_st_size (st))
|
|---|
| 968 | {
|
|---|
| 969 | off_t to_be_read = st->st_size - bufoffset;
|
|---|
| 970 | off_t maxsize_off = save + to_be_read;
|
|---|
| 971 | if (0 <= to_be_read && to_be_read <= maxsize_off
|
|---|
| 972 | && maxsize_off == (size_t) maxsize_off
|
|---|
| 973 | && minsize <= (size_t) maxsize_off
|
|---|
| 974 | && (size_t) maxsize_off < newsize)
|
|---|
| 975 | newsize = maxsize_off;
|
|---|
| 976 | }
|
|---|
| 977 |
|
|---|
| 978 | /* Add enough room so that the buffer is aligned and has room
|
|---|
| 979 | for byte sentinels fore and aft, and so that a uword can
|
|---|
| 980 | be read aft. */
|
|---|
| 981 | newalloc = newsize + pagesize + sizeof (uword);
|
|---|
| 982 |
|
|---|
| 983 | newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
|
|---|
| 984 | readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
|
|---|
| 985 | size_t moved = save + 1; /* Move the preceding byte sentinel too. */
|
|---|
| 986 | memmove (readbuf - moved, buflim - moved, moved);
|
|---|
| 987 | if (newbuf != buffer)
|
|---|
| 988 | {
|
|---|
| 989 | free (buffer);
|
|---|
| 990 | buffer = newbuf;
|
|---|
| 991 | }
|
|---|
| 992 | }
|
|---|
| 993 |
|
|---|
| 994 | bufbeg = readbuf - save;
|
|---|
| 995 |
|
|---|
| 996 | clear_asan_poison ();
|
|---|
| 997 |
|
|---|
| 998 | readsize = buffer + bufalloc - sizeof (uword) - readbuf;
|
|---|
| 999 | readsize -= readsize % pagesize;
|
|---|
| 1000 |
|
|---|
| 1001 | while (true)
|
|---|
| 1002 | {
|
|---|
| 1003 | fillsize = safe_read (bufdesc, readbuf, readsize);
|
|---|
| 1004 | if (fillsize == SAFE_READ_ERROR)
|
|---|
| 1005 | {
|
|---|
| 1006 | fillsize = 0;
|
|---|
| 1007 | cc = false;
|
|---|
| 1008 | }
|
|---|
| 1009 | bufoffset += fillsize;
|
|---|
| 1010 |
|
|---|
| 1011 | if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
|
|---|
| 1012 | break;
|
|---|
| 1013 | totalnl = add_count (totalnl, fillsize);
|
|---|
| 1014 |
|
|---|
| 1015 | if (SEEK_DATA != SEEK_SET && !seek_data_failed)
|
|---|
| 1016 | {
|
|---|
| 1017 | /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
|
|---|
| 1018 | off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
|
|---|
| 1019 | if (data_start < 0 && errno == ENXIO
|
|---|
| 1020 | && usable_st_size (st) && bufoffset < st->st_size)
|
|---|
| 1021 | data_start = lseek (bufdesc, 0, SEEK_END);
|
|---|
| 1022 |
|
|---|
| 1023 | if (data_start < 0)
|
|---|
| 1024 | seek_data_failed = true;
|
|---|
| 1025 | else
|
|---|
| 1026 | {
|
|---|
| 1027 | totalnl = add_count (totalnl, data_start - bufoffset);
|
|---|
| 1028 | bufoffset = data_start;
|
|---|
| 1029 | }
|
|---|
| 1030 | }
|
|---|
| 1031 | }
|
|---|
| 1032 |
|
|---|
| 1033 | buflim = readbuf + fillsize;
|
|---|
| 1034 |
|
|---|
| 1035 | /* Initialize the following word, because skip_easy_bytes and some
|
|---|
| 1036 | matchers read (but do not use) those bytes. This avoids false
|
|---|
| 1037 | positive reports of these bytes being used uninitialized. */
|
|---|
| 1038 | memset (buflim, 0, sizeof (uword));
|
|---|
| 1039 |
|
|---|
| 1040 | /* Mark the part of the buffer not filled by the read or set by
|
|---|
| 1041 | the above memset call as ASAN-poisoned. */
|
|---|
| 1042 | asan_poison (buflim + sizeof (uword),
|
|---|
| 1043 | bufalloc - (buflim - buffer) - sizeof (uword));
|
|---|
| 1044 |
|
|---|
| 1045 | return cc;
|
|---|
| 1046 | }
|
|---|
| 1047 |
|
|---|
| 1048 | /* Flags controlling the style of output. */
|
|---|
| 1049 | static enum
|
|---|
| 1050 | {
|
|---|
| 1051 | BINARY_BINARY_FILES,
|
|---|
| 1052 | TEXT_BINARY_FILES,
|
|---|
| 1053 | WITHOUT_MATCH_BINARY_FILES
|
|---|
| 1054 | } binary_files; /* How to handle binary files. */
|
|---|
| 1055 |
|
|---|
| 1056 | /* Options for output as a list of matching/non-matching files */
|
|---|
| 1057 | static enum
|
|---|
| 1058 | {
|
|---|
| 1059 | LISTFILES_NONE,
|
|---|
| 1060 | LISTFILES_MATCHING,
|
|---|
| 1061 | LISTFILES_NONMATCHING,
|
|---|
| 1062 | } list_files;
|
|---|
| 1063 |
|
|---|
| 1064 | /* Whether to output filenames. 1 means yes, 0 means no, and -1 means
|
|---|
| 1065 | 'grep -r PATTERN FILE' was used and it is not known yet whether
|
|---|
| 1066 | FILE is a directory (which means yes) or not (which means no). */
|
|---|
| 1067 | static int out_file;
|
|---|
| 1068 |
|
|---|
| 1069 | static int filename_mask; /* If zero, output nulls after filenames. */
|
|---|
| 1070 | static bool out_quiet; /* Suppress all normal output. */
|
|---|
| 1071 | static bool out_invert; /* Print nonmatching stuff. */
|
|---|
| 1072 | static bool out_line; /* Print line numbers. */
|
|---|
| 1073 | static bool out_byte; /* Print byte offsets. */
|
|---|
| 1074 | static intmax_t out_before; /* Lines of leading context. */
|
|---|
| 1075 | static intmax_t out_after; /* Lines of trailing context. */
|
|---|
| 1076 | static bool count_matches; /* Count matching lines. */
|
|---|
| 1077 | static intmax_t max_count; /* Max number of selected
|
|---|
| 1078 | lines from an input file. */
|
|---|
| 1079 | static bool line_buffered; /* Use line buffering. */
|
|---|
| 1080 | static char *label = NULL; /* Fake filename for stdin */
|
|---|
| 1081 |
|
|---|
| 1082 |
|
|---|
| 1083 | /* Internal variables to keep track of byte count, context, etc. */
|
|---|
| 1084 | static uintmax_t totalcc; /* Total character count before bufbeg. */
|
|---|
| 1085 | static char const *lastnl; /* Pointer after last newline counted. */
|
|---|
| 1086 | static char *lastout; /* Pointer after last character output;
|
|---|
| 1087 | NULL if no character has been output
|
|---|
| 1088 | or if it's conceptually before bufbeg. */
|
|---|
| 1089 | static intmax_t outleft; /* Maximum number of selected lines. */
|
|---|
| 1090 | static intmax_t pending; /* Pending lines of output.
|
|---|
| 1091 | Always kept 0 if out_quiet is true. */
|
|---|
| 1092 | static bool done_on_match; /* Stop scanning file on first match. */
|
|---|
| 1093 | static bool exit_on_match; /* Exit on first match. */
|
|---|
| 1094 | static bool dev_null_output; /* Stdout is known to be /dev/null. */
|
|---|
| 1095 | static bool binary; /* Use binary rather than text I/O. */
|
|---|
| 1096 |
|
|---|
| 1097 | static void
|
|---|
| 1098 | nlscan (char const *lim)
|
|---|
| 1099 | {
|
|---|
| 1100 | size_t newlines = 0;
|
|---|
| 1101 | for (char const *beg = lastnl; beg < lim; beg++)
|
|---|
| 1102 | {
|
|---|
| 1103 | beg = memchr (beg, eolbyte, lim - beg);
|
|---|
| 1104 | if (!beg)
|
|---|
| 1105 | break;
|
|---|
| 1106 | newlines++;
|
|---|
| 1107 | }
|
|---|
| 1108 | totalnl = add_count (totalnl, newlines);
|
|---|
| 1109 | lastnl = lim;
|
|---|
| 1110 | }
|
|---|
| 1111 |
|
|---|
| 1112 | /* Print the current filename. */
|
|---|
| 1113 | static void
|
|---|
| 1114 | print_filename (void)
|
|---|
| 1115 | {
|
|---|
| 1116 | pr_sgr_start_if (filename_color);
|
|---|
| 1117 | fputs_errno (input_filename ());
|
|---|
| 1118 | pr_sgr_end_if (filename_color);
|
|---|
| 1119 | }
|
|---|
| 1120 |
|
|---|
| 1121 | /* Print a character separator. */
|
|---|
| 1122 | static void
|
|---|
| 1123 | print_sep (char sep)
|
|---|
| 1124 | {
|
|---|
| 1125 | pr_sgr_start_if (sep_color);
|
|---|
| 1126 | putchar_errno (sep);
|
|---|
| 1127 | pr_sgr_end_if (sep_color);
|
|---|
| 1128 | }
|
|---|
| 1129 |
|
|---|
| 1130 | /* Print a line number or a byte offset. */
|
|---|
| 1131 | static void
|
|---|
| 1132 | print_offset (uintmax_t pos, const char *color)
|
|---|
| 1133 | {
|
|---|
| 1134 | pr_sgr_start_if (color);
|
|---|
| 1135 | printf_errno ("%*"PRIuMAX, offset_width, pos);
|
|---|
| 1136 | pr_sgr_end_if (color);
|
|---|
| 1137 | }
|
|---|
| 1138 |
|
|---|
| 1139 | /* Print a whole line head (filename, line, byte). The output data
|
|---|
| 1140 | starts at BEG and contains LEN bytes; it is followed by at least
|
|---|
| 1141 | sizeof (uword) bytes, the first of which may be temporarily modified.
|
|---|
| 1142 | The output data comes from what is perhaps a larger input line that
|
|---|
| 1143 | goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
|
|---|
| 1144 | the separator on output.
|
|---|
| 1145 |
|
|---|
| 1146 | Return true unless the line was suppressed due to an encoding error. */
|
|---|
| 1147 |
|
|---|
| 1148 | static bool
|
|---|
| 1149 | print_line_head (char *beg, size_t len, char const *lim, char sep)
|
|---|
| 1150 | {
|
|---|
| 1151 | if (binary_files != TEXT_BINARY_FILES)
|
|---|
| 1152 | {
|
|---|
| 1153 | char ch = beg[len];
|
|---|
| 1154 | bool encoding_errors = buf_has_encoding_errors (beg, len);
|
|---|
| 1155 | beg[len] = ch;
|
|---|
| 1156 | if (encoding_errors)
|
|---|
| 1157 | {
|
|---|
| 1158 | encoding_error_output = true;
|
|---|
| 1159 | return false;
|
|---|
| 1160 | }
|
|---|
| 1161 | }
|
|---|
| 1162 |
|
|---|
| 1163 | if (out_file)
|
|---|
| 1164 | {
|
|---|
| 1165 | print_filename ();
|
|---|
| 1166 | if (filename_mask)
|
|---|
| 1167 | print_sep (sep);
|
|---|
| 1168 | else
|
|---|
| 1169 | putchar_errno (0);
|
|---|
| 1170 | }
|
|---|
| 1171 |
|
|---|
| 1172 | if (out_line)
|
|---|
| 1173 | {
|
|---|
| 1174 | if (lastnl < lim)
|
|---|
| 1175 | {
|
|---|
| 1176 | nlscan (beg);
|
|---|
| 1177 | totalnl = add_count (totalnl, 1);
|
|---|
| 1178 | lastnl = lim;
|
|---|
| 1179 | }
|
|---|
| 1180 | print_offset (totalnl, line_num_color);
|
|---|
| 1181 | print_sep (sep);
|
|---|
| 1182 | }
|
|---|
| 1183 |
|
|---|
| 1184 | if (out_byte)
|
|---|
| 1185 | {
|
|---|
| 1186 | uintmax_t pos = add_count (totalcc, beg - bufbeg);
|
|---|
| 1187 | print_offset (pos, byte_num_color);
|
|---|
| 1188 | print_sep (sep);
|
|---|
| 1189 | }
|
|---|
| 1190 |
|
|---|
| 1191 | if (align_tabs && (out_file | out_line | out_byte) && len != 0)
|
|---|
| 1192 | putchar_errno ('\t');
|
|---|
| 1193 |
|
|---|
| 1194 | return true;
|
|---|
| 1195 | }
|
|---|
| 1196 |
|
|---|
| 1197 | static char *
|
|---|
| 1198 | print_line_middle (char *beg, char *lim,
|
|---|
| 1199 | const char *line_color, const char *match_color)
|
|---|
| 1200 | {
|
|---|
| 1201 | size_t match_size;
|
|---|
| 1202 | size_t match_offset;
|
|---|
| 1203 | char *cur;
|
|---|
| 1204 | char *mid = NULL;
|
|---|
| 1205 | char *b;
|
|---|
| 1206 |
|
|---|
| 1207 | for (cur = beg;
|
|---|
| 1208 | (cur < lim
|
|---|
| 1209 | && ((match_offset = execute (compiled_pattern, beg, lim - beg,
|
|---|
| 1210 | &match_size, cur)) != (size_t) -1));
|
|---|
| 1211 | cur = b + match_size)
|
|---|
| 1212 | {
|
|---|
| 1213 | b = beg + match_offset;
|
|---|
| 1214 |
|
|---|
| 1215 | /* Avoid matching the empty line at the end of the buffer. */
|
|---|
| 1216 | if (b == lim)
|
|---|
| 1217 | break;
|
|---|
| 1218 |
|
|---|
| 1219 | /* Avoid hanging on grep --color "" foo */
|
|---|
| 1220 | if (match_size == 0)
|
|---|
| 1221 | {
|
|---|
| 1222 | /* Make minimal progress; there may be further non-empty matches. */
|
|---|
| 1223 | /* XXX - Could really advance by one whole multi-octet character. */
|
|---|
| 1224 | match_size = 1;
|
|---|
| 1225 | if (!mid)
|
|---|
| 1226 | mid = cur;
|
|---|
| 1227 | }
|
|---|
| 1228 | else
|
|---|
| 1229 | {
|
|---|
| 1230 | /* This function is called on a matching line only,
|
|---|
| 1231 | but is it selected or rejected/context? */
|
|---|
| 1232 | if (only_matching)
|
|---|
| 1233 | {
|
|---|
| 1234 | char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
|
|---|
| 1235 | if (! print_line_head (b, match_size, lim, sep))
|
|---|
| 1236 | return NULL;
|
|---|
| 1237 | }
|
|---|
| 1238 | else
|
|---|
| 1239 | {
|
|---|
| 1240 | pr_sgr_start (line_color);
|
|---|
| 1241 | if (mid)
|
|---|
| 1242 | {
|
|---|
| 1243 | cur = mid;
|
|---|
| 1244 | mid = NULL;
|
|---|
| 1245 | }
|
|---|
| 1246 | fwrite_errno (cur, 1, b - cur);
|
|---|
| 1247 | }
|
|---|
| 1248 |
|
|---|
| 1249 | pr_sgr_start_if (match_color);
|
|---|
| 1250 | fwrite_errno (b, 1, match_size);
|
|---|
| 1251 | pr_sgr_end_if (match_color);
|
|---|
| 1252 | if (only_matching)
|
|---|
| 1253 | putchar_errno (eolbyte);
|
|---|
| 1254 | }
|
|---|
| 1255 | }
|
|---|
| 1256 |
|
|---|
| 1257 | if (only_matching)
|
|---|
| 1258 | cur = lim;
|
|---|
| 1259 | else if (mid)
|
|---|
| 1260 | cur = mid;
|
|---|
| 1261 |
|
|---|
| 1262 | return cur;
|
|---|
| 1263 | }
|
|---|
| 1264 |
|
|---|
| 1265 | static char *
|
|---|
| 1266 | print_line_tail (char *beg, const char *lim, const char *line_color)
|
|---|
| 1267 | {
|
|---|
| 1268 | size_t eol_size;
|
|---|
| 1269 | size_t tail_size;
|
|---|
| 1270 |
|
|---|
| 1271 | eol_size = (lim > beg && lim[-1] == eolbyte);
|
|---|
| 1272 | eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
|
|---|
| 1273 | tail_size = lim - eol_size - beg;
|
|---|
| 1274 |
|
|---|
| 1275 | if (tail_size > 0)
|
|---|
| 1276 | {
|
|---|
| 1277 | pr_sgr_start (line_color);
|
|---|
| 1278 | fwrite_errno (beg, 1, tail_size);
|
|---|
| 1279 | beg += tail_size;
|
|---|
| 1280 | pr_sgr_end (line_color);
|
|---|
| 1281 | }
|
|---|
| 1282 |
|
|---|
| 1283 | return beg;
|
|---|
| 1284 | }
|
|---|
| 1285 |
|
|---|
| 1286 | static void
|
|---|
| 1287 | prline (char *beg, char *lim, char sep)
|
|---|
| 1288 | {
|
|---|
| 1289 | bool matching;
|
|---|
| 1290 | const char *line_color;
|
|---|
| 1291 | const char *match_color;
|
|---|
| 1292 |
|
|---|
| 1293 | if (!only_matching)
|
|---|
| 1294 | if (! print_line_head (beg, lim - beg - 1, lim, sep))
|
|---|
| 1295 | return;
|
|---|
| 1296 |
|
|---|
| 1297 | matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
|
|---|
| 1298 |
|
|---|
| 1299 | if (color_option)
|
|---|
| 1300 | {
|
|---|
| 1301 | line_color = (((sep == SEP_CHAR_SELECTED)
|
|---|
| 1302 | ^ (out_invert && (color_option < 0)))
|
|---|
| 1303 | ? selected_line_color : context_line_color);
|
|---|
| 1304 | match_color = (sep == SEP_CHAR_SELECTED
|
|---|
| 1305 | ? selected_match_color : context_match_color);
|
|---|
| 1306 | }
|
|---|
| 1307 | else
|
|---|
| 1308 | line_color = match_color = NULL; /* Shouldn't be used. */
|
|---|
| 1309 |
|
|---|
| 1310 | if ((only_matching && matching)
|
|---|
| 1311 | || (color_option && (*line_color || *match_color)))
|
|---|
| 1312 | {
|
|---|
| 1313 | /* We already know that non-matching lines have no match (to colorize). */
|
|---|
| 1314 | if (matching && (only_matching || *match_color))
|
|---|
| 1315 | {
|
|---|
| 1316 | beg = print_line_middle (beg, lim, line_color, match_color);
|
|---|
| 1317 | if (! beg)
|
|---|
| 1318 | return;
|
|---|
| 1319 | }
|
|---|
| 1320 |
|
|---|
| 1321 | if (!only_matching && *line_color)
|
|---|
| 1322 | {
|
|---|
| 1323 | /* This code is exercised at least when grep is invoked like this:
|
|---|
| 1324 | echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
|
|---|
| 1325 | beg = print_line_tail (beg, lim, line_color);
|
|---|
| 1326 | }
|
|---|
| 1327 | }
|
|---|
| 1328 |
|
|---|
| 1329 | if (!only_matching && lim > beg)
|
|---|
| 1330 | fwrite_errno (beg, 1, lim - beg);
|
|---|
| 1331 |
|
|---|
| 1332 | if (line_buffered)
|
|---|
| 1333 | fflush_errno ();
|
|---|
| 1334 |
|
|---|
| 1335 | if (stdout_errno)
|
|---|
| 1336 | die (EXIT_TROUBLE, stdout_errno, _("write error"));
|
|---|
| 1337 |
|
|---|
| 1338 | lastout = lim;
|
|---|
| 1339 | }
|
|---|
| 1340 |
|
|---|
| 1341 | /* Print pending lines of trailing context prior to LIM. */
|
|---|
| 1342 | static void
|
|---|
| 1343 | prpending (char const *lim)
|
|---|
| 1344 | {
|
|---|
| 1345 | if (!lastout)
|
|---|
| 1346 | lastout = bufbeg;
|
|---|
| 1347 | for (; 0 < pending && lastout < lim; pending--)
|
|---|
| 1348 | {
|
|---|
| 1349 | char *nl = rawmemchr (lastout, eolbyte);
|
|---|
| 1350 | prline (lastout, nl + 1, SEP_CHAR_REJECTED);
|
|---|
| 1351 | }
|
|---|
| 1352 | }
|
|---|
| 1353 |
|
|---|
| 1354 | /* Output the lines between BEG and LIM. Deal with context. */
|
|---|
| 1355 | static void
|
|---|
| 1356 | prtext (char *beg, char *lim)
|
|---|
| 1357 | {
|
|---|
| 1358 | static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
|
|---|
| 1359 | char eol = eolbyte;
|
|---|
| 1360 |
|
|---|
| 1361 | if (!out_quiet && pending > 0)
|
|---|
| 1362 | prpending (beg);
|
|---|
| 1363 |
|
|---|
| 1364 | char *p = beg;
|
|---|
| 1365 |
|
|---|
| 1366 | if (!out_quiet)
|
|---|
| 1367 | {
|
|---|
| 1368 | /* Deal with leading context. */
|
|---|
| 1369 | char const *bp = lastout ? lastout : bufbeg;
|
|---|
| 1370 | intmax_t i;
|
|---|
| 1371 | for (i = 0; i < out_before; ++i)
|
|---|
| 1372 | if (p > bp)
|
|---|
| 1373 | do
|
|---|
| 1374 | --p;
|
|---|
| 1375 | while (p[-1] != eol);
|
|---|
| 1376 |
|
|---|
| 1377 | /* Print the group separator unless the output is adjacent to
|
|---|
| 1378 | the previous output in the file. */
|
|---|
| 1379 | if ((0 <= out_before || 0 <= out_after) && used
|
|---|
| 1380 | && p != lastout && group_separator)
|
|---|
| 1381 | {
|
|---|
| 1382 | pr_sgr_start_if (sep_color);
|
|---|
| 1383 | fputs_errno (group_separator);
|
|---|
| 1384 | pr_sgr_end_if (sep_color);
|
|---|
| 1385 | putchar_errno ('\n');
|
|---|
| 1386 | }
|
|---|
| 1387 |
|
|---|
| 1388 | while (p < beg)
|
|---|
| 1389 | {
|
|---|
| 1390 | char *nl = rawmemchr (p, eol);
|
|---|
| 1391 | nl++;
|
|---|
| 1392 | prline (p, nl, SEP_CHAR_REJECTED);
|
|---|
| 1393 | p = nl;
|
|---|
| 1394 | }
|
|---|
| 1395 | }
|
|---|
| 1396 |
|
|---|
| 1397 | intmax_t n;
|
|---|
| 1398 | if (out_invert)
|
|---|
| 1399 | {
|
|---|
| 1400 | /* One or more lines are output. */
|
|---|
| 1401 | for (n = 0; p < lim && n < outleft; n++)
|
|---|
| 1402 | {
|
|---|
| 1403 | char *nl = rawmemchr (p, eol);
|
|---|
| 1404 | nl++;
|
|---|
| 1405 | if (!out_quiet)
|
|---|
| 1406 | prline (p, nl, SEP_CHAR_SELECTED);
|
|---|
| 1407 | p = nl;
|
|---|
| 1408 | }
|
|---|
| 1409 | }
|
|---|
| 1410 | else
|
|---|
| 1411 | {
|
|---|
| 1412 | /* Just one line is output. */
|
|---|
| 1413 | if (!out_quiet)
|
|---|
| 1414 | prline (beg, lim, SEP_CHAR_SELECTED);
|
|---|
| 1415 | n = 1;
|
|---|
| 1416 | p = lim;
|
|---|
| 1417 | }
|
|---|
| 1418 |
|
|---|
| 1419 | after_last_match = bufoffset - (buflim - p);
|
|---|
| 1420 | pending = out_quiet ? 0 : MAX (0, out_after);
|
|---|
| 1421 | used = true;
|
|---|
| 1422 | outleft -= n;
|
|---|
| 1423 | }
|
|---|
| 1424 |
|
|---|
| 1425 | /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
|
|---|
| 1426 | This avoids running out of memory when binary input contains a long
|
|---|
| 1427 | sequence of zeros, which would otherwise be considered to be part
|
|---|
| 1428 | of a long line. P[LIM] should be EOL. */
|
|---|
| 1429 | static void
|
|---|
| 1430 | zap_nuls (char *p, char *lim, char eol)
|
|---|
| 1431 | {
|
|---|
| 1432 | if (eol)
|
|---|
| 1433 | while (true)
|
|---|
| 1434 | {
|
|---|
| 1435 | *lim = '\0';
|
|---|
| 1436 | p += strlen (p);
|
|---|
| 1437 | *lim = eol;
|
|---|
| 1438 | if (p == lim)
|
|---|
| 1439 | break;
|
|---|
| 1440 | do
|
|---|
| 1441 | *p++ = eol;
|
|---|
| 1442 | while (!*p);
|
|---|
| 1443 | }
|
|---|
| 1444 | }
|
|---|
| 1445 |
|
|---|
| 1446 | /* Scan the specified portion of the buffer, matching lines (or
|
|---|
| 1447 | between matching lines if OUT_INVERT is true). Return a count of
|
|---|
| 1448 | lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
|
|---|
| 1449 | static intmax_t
|
|---|
| 1450 | grepbuf (char *beg, char const *lim)
|
|---|
| 1451 | {
|
|---|
| 1452 | intmax_t outleft0 = outleft;
|
|---|
| 1453 | char *endp;
|
|---|
| 1454 |
|
|---|
| 1455 | for (char *p = beg; p < lim; p = endp)
|
|---|
| 1456 | {
|
|---|
| 1457 | size_t match_size;
|
|---|
| 1458 | size_t match_offset = execute (compiled_pattern, p, lim - p,
|
|---|
| 1459 | &match_size, NULL);
|
|---|
| 1460 | if (match_offset == (size_t) -1)
|
|---|
| 1461 | {
|
|---|
| 1462 | if (!out_invert)
|
|---|
| 1463 | break;
|
|---|
| 1464 | match_offset = lim - p;
|
|---|
| 1465 | match_size = 0;
|
|---|
| 1466 | }
|
|---|
| 1467 | char *b = p + match_offset;
|
|---|
| 1468 | endp = b + match_size;
|
|---|
| 1469 | /* Avoid matching the empty line at the end of the buffer. */
|
|---|
| 1470 | if (!out_invert && b == lim)
|
|---|
| 1471 | break;
|
|---|
| 1472 | if (!out_invert || p < b)
|
|---|
| 1473 | {
|
|---|
| 1474 | char *prbeg = out_invert ? p : b;
|
|---|
| 1475 | char *prend = out_invert ? b : endp;
|
|---|
| 1476 | prtext (prbeg, prend);
|
|---|
| 1477 | if (!outleft || done_on_match)
|
|---|
| 1478 | {
|
|---|
| 1479 | if (exit_on_match)
|
|---|
| 1480 | exit (errseen ? exit_failure : EXIT_SUCCESS);
|
|---|
| 1481 | break;
|
|---|
| 1482 | }
|
|---|
| 1483 | }
|
|---|
| 1484 | }
|
|---|
| 1485 |
|
|---|
| 1486 | return outleft0 - outleft;
|
|---|
| 1487 | }
|
|---|
| 1488 |
|
|---|
| 1489 | /* Search a given (non-directory) file. Return a count of lines printed.
|
|---|
| 1490 | Set *INEOF to true if end-of-file reached. */
|
|---|
| 1491 | static intmax_t
|
|---|
| 1492 | grep (int fd, struct stat const *st, bool *ineof)
|
|---|
| 1493 | {
|
|---|
| 1494 | intmax_t nlines, i;
|
|---|
| 1495 | size_t residue, save;
|
|---|
| 1496 | char oldc;
|
|---|
| 1497 | char *beg;
|
|---|
| 1498 | char *lim;
|
|---|
| 1499 | char eol = eolbyte;
|
|---|
| 1500 | char nul_zapper = '\0';
|
|---|
| 1501 | bool done_on_match_0 = done_on_match;
|
|---|
| 1502 | bool out_quiet_0 = out_quiet;
|
|---|
| 1503 |
|
|---|
| 1504 | /* The value of NLINES when nulls were first deduced in the input;
|
|---|
| 1505 | this is not necessarily the same as the number of matching lines
|
|---|
| 1506 | before the first null. -1 if no input nulls have been deduced. */
|
|---|
| 1507 | intmax_t nlines_first_null = -1;
|
|---|
| 1508 |
|
|---|
| 1509 | if (! reset (fd, st))
|
|---|
| 1510 | return 0;
|
|---|
| 1511 |
|
|---|
| 1512 | totalcc = 0;
|
|---|
| 1513 | lastout = 0;
|
|---|
| 1514 | totalnl = 0;
|
|---|
| 1515 | outleft = max_count;
|
|---|
| 1516 | after_last_match = 0;
|
|---|
| 1517 | pending = 0;
|
|---|
| 1518 | skip_nuls = skip_empty_lines && !eol;
|
|---|
| 1519 | encoding_error_output = false;
|
|---|
| 1520 |
|
|---|
| 1521 | nlines = 0;
|
|---|
| 1522 | residue = 0;
|
|---|
| 1523 | save = 0;
|
|---|
| 1524 |
|
|---|
| 1525 | if (! fillbuf (save, st))
|
|---|
| 1526 | {
|
|---|
| 1527 | suppressible_error (errno);
|
|---|
| 1528 | return 0;
|
|---|
| 1529 | }
|
|---|
| 1530 |
|
|---|
| 1531 | offset_width = 0;
|
|---|
| 1532 | if (align_tabs)
|
|---|
| 1533 | {
|
|---|
| 1534 | /* Width is log of maximum number. Line numbers are origin-1. */
|
|---|
| 1535 | uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
|
|---|
| 1536 | num += out_line && num < UINTMAX_MAX;
|
|---|
| 1537 | do
|
|---|
| 1538 | offset_width++;
|
|---|
| 1539 | while ((num /= 10) != 0);
|
|---|
| 1540 | }
|
|---|
| 1541 |
|
|---|
| 1542 | for (bool firsttime = true; ; firsttime = false)
|
|---|
| 1543 | {
|
|---|
| 1544 | if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
|
|---|
| 1545 | && (buf_has_nulls (bufbeg, buflim - bufbeg)
|
|---|
| 1546 | || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
|
|---|
| 1547 | {
|
|---|
| 1548 | if (binary_files == WITHOUT_MATCH_BINARY_FILES)
|
|---|
| 1549 | return 0;
|
|---|
| 1550 | if (!count_matches)
|
|---|
| 1551 | done_on_match = out_quiet = true;
|
|---|
| 1552 | nlines_first_null = nlines;
|
|---|
| 1553 | nul_zapper = eol;
|
|---|
| 1554 | skip_nuls = skip_empty_lines;
|
|---|
| 1555 | }
|
|---|
| 1556 |
|
|---|
| 1557 | lastnl = bufbeg;
|
|---|
| 1558 | if (lastout)
|
|---|
| 1559 | lastout = bufbeg;
|
|---|
| 1560 |
|
|---|
| 1561 | beg = bufbeg + save;
|
|---|
| 1562 |
|
|---|
| 1563 | /* no more data to scan (eof) except for maybe a residue -> break */
|
|---|
| 1564 | if (beg == buflim)
|
|---|
| 1565 | {
|
|---|
| 1566 | *ineof = true;
|
|---|
| 1567 | break;
|
|---|
| 1568 | }
|
|---|
| 1569 |
|
|---|
| 1570 | zap_nuls (beg, buflim, nul_zapper);
|
|---|
| 1571 |
|
|---|
| 1572 | /* Determine new residue (the length of an incomplete line at the end of
|
|---|
| 1573 | the buffer, 0 means there is no incomplete last line). */
|
|---|
| 1574 | oldc = beg[-1];
|
|---|
| 1575 | beg[-1] = eol;
|
|---|
| 1576 | /* FIXME: use rawmemrchr if/when it exists, since we have ensured
|
|---|
| 1577 | that this use of memrchr is guaranteed never to return NULL. */
|
|---|
| 1578 | lim = memrchr (beg - 1, eol, buflim - beg + 1);
|
|---|
| 1579 | ++lim;
|
|---|
| 1580 | beg[-1] = oldc;
|
|---|
| 1581 | if (lim == beg)
|
|---|
| 1582 | lim = beg - residue;
|
|---|
| 1583 | beg -= residue;
|
|---|
| 1584 | residue = buflim - lim;
|
|---|
| 1585 |
|
|---|
| 1586 | if (beg < lim)
|
|---|
| 1587 | {
|
|---|
| 1588 | if (outleft)
|
|---|
| 1589 | nlines += grepbuf (beg, lim);
|
|---|
| 1590 | if (pending)
|
|---|
| 1591 | prpending (lim);
|
|---|
| 1592 | if ((!outleft && !pending)
|
|---|
| 1593 | || (done_on_match && MAX (0, nlines_first_null) < nlines))
|
|---|
| 1594 | goto finish_grep;
|
|---|
| 1595 | }
|
|---|
| 1596 |
|
|---|
| 1597 | /* The last OUT_BEFORE lines at the end of the buffer will be needed as
|
|---|
| 1598 | leading context if there is a matching line at the begin of the
|
|---|
| 1599 | next data. Make beg point to their begin. */
|
|---|
| 1600 | i = 0;
|
|---|
| 1601 | beg = lim;
|
|---|
| 1602 | while (i < out_before && beg > bufbeg && beg != lastout)
|
|---|
| 1603 | {
|
|---|
| 1604 | ++i;
|
|---|
| 1605 | do
|
|---|
| 1606 | --beg;
|
|---|
| 1607 | while (beg[-1] != eol);
|
|---|
| 1608 | }
|
|---|
| 1609 |
|
|---|
| 1610 | /* Detect whether leading context is adjacent to previous output. */
|
|---|
| 1611 | if (beg != lastout)
|
|---|
| 1612 | lastout = 0;
|
|---|
| 1613 |
|
|---|
| 1614 | /* Handle some details and read more data to scan. */
|
|---|
| 1615 | save = residue + lim - beg;
|
|---|
| 1616 | if (out_byte)
|
|---|
| 1617 | totalcc = add_count (totalcc, buflim - bufbeg - save);
|
|---|
| 1618 | if (out_line)
|
|---|
| 1619 | nlscan (beg);
|
|---|
| 1620 | if (! fillbuf (save, st))
|
|---|
| 1621 | {
|
|---|
| 1622 | suppressible_error (errno);
|
|---|
| 1623 | goto finish_grep;
|
|---|
| 1624 | }
|
|---|
| 1625 | }
|
|---|
| 1626 | if (residue)
|
|---|
| 1627 | {
|
|---|
| 1628 | *buflim++ = eol;
|
|---|
| 1629 | if (outleft)
|
|---|
| 1630 | nlines += grepbuf (bufbeg + save - residue, buflim);
|
|---|
| 1631 | if (pending)
|
|---|
| 1632 | prpending (buflim);
|
|---|
| 1633 | }
|
|---|
| 1634 |
|
|---|
| 1635 | finish_grep:
|
|---|
| 1636 | done_on_match = done_on_match_0;
|
|---|
| 1637 | out_quiet = out_quiet_0;
|
|---|
| 1638 | if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
|
|---|
| 1639 | && (encoding_error_output
|
|---|
| 1640 | || (0 <= nlines_first_null && nlines_first_null < nlines)))
|
|---|
| 1641 | error (0, 0, _("%s: binary file matches"), input_filename ());
|
|---|
| 1642 | return nlines;
|
|---|
| 1643 | }
|
|---|
| 1644 |
|
|---|
| 1645 | static bool
|
|---|
| 1646 | grepdirent (FTS *fts, FTSENT *ent, bool command_line)
|
|---|
| 1647 | {
|
|---|
| 1648 | bool follow;
|
|---|
| 1649 | command_line &= ent->fts_level == FTS_ROOTLEVEL;
|
|---|
| 1650 |
|
|---|
| 1651 | if (ent->fts_info == FTS_DP)
|
|---|
| 1652 | return true;
|
|---|
| 1653 |
|
|---|
| 1654 | if (!command_line
|
|---|
| 1655 | && skipped_file (ent->fts_name, false,
|
|---|
| 1656 | (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
|
|---|
| 1657 | || ent->fts_info == FTS_DNR)))
|
|---|
| 1658 | {
|
|---|
| 1659 | fts_set (fts, ent, FTS_SKIP);
|
|---|
| 1660 | return true;
|
|---|
| 1661 | }
|
|---|
| 1662 |
|
|---|
| 1663 | filename = ent->fts_path;
|
|---|
| 1664 | if (omit_dot_slash && filename[1])
|
|---|
| 1665 | filename += 2;
|
|---|
| 1666 | follow = (fts->fts_options & FTS_LOGICAL
|
|---|
| 1667 | || (fts->fts_options & FTS_COMFOLLOW && command_line));
|
|---|
| 1668 |
|
|---|
| 1669 | switch (ent->fts_info)
|
|---|
| 1670 | {
|
|---|
| 1671 | case FTS_D:
|
|---|
| 1672 | if (directories == RECURSE_DIRECTORIES)
|
|---|
| 1673 | return true;
|
|---|
| 1674 | fts_set (fts, ent, FTS_SKIP);
|
|---|
| 1675 | break;
|
|---|
| 1676 |
|
|---|
| 1677 | case FTS_DC:
|
|---|
| 1678 | if (!suppress_errors)
|
|---|
| 1679 | error (0, 0, _("%s: warning: recursive directory loop"), filename);
|
|---|
| 1680 | return true;
|
|---|
| 1681 |
|
|---|
| 1682 | case FTS_DNR:
|
|---|
| 1683 | case FTS_ERR:
|
|---|
| 1684 | case FTS_NS:
|
|---|
| 1685 | suppressible_error (ent->fts_errno);
|
|---|
| 1686 | return true;
|
|---|
| 1687 |
|
|---|
| 1688 | case FTS_DEFAULT:
|
|---|
| 1689 | case FTS_NSOK:
|
|---|
| 1690 | if (skip_devices (command_line))
|
|---|
| 1691 | {
|
|---|
| 1692 | struct stat *st = ent->fts_statp;
|
|---|
| 1693 | struct stat st1;
|
|---|
| 1694 | if (! st->st_mode)
|
|---|
| 1695 | {
|
|---|
| 1696 | /* The file type is not already known. Get the file status
|
|---|
| 1697 | before opening, since opening might have side effects
|
|---|
| 1698 | on a device. */
|
|---|
| 1699 | int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
|
|---|
| 1700 | if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
|
|---|
| 1701 | {
|
|---|
| 1702 | suppressible_error (errno);
|
|---|
| 1703 | return true;
|
|---|
| 1704 | }
|
|---|
| 1705 | st = &st1;
|
|---|
| 1706 | }
|
|---|
| 1707 | if (is_device_mode (st->st_mode))
|
|---|
| 1708 | return true;
|
|---|
| 1709 | }
|
|---|
| 1710 | break;
|
|---|
| 1711 |
|
|---|
| 1712 | case FTS_F:
|
|---|
| 1713 | case FTS_SLNONE:
|
|---|
| 1714 | break;
|
|---|
| 1715 |
|
|---|
| 1716 | case FTS_SL:
|
|---|
| 1717 | case FTS_W:
|
|---|
| 1718 | return true;
|
|---|
| 1719 |
|
|---|
| 1720 | default:
|
|---|
| 1721 | abort ();
|
|---|
| 1722 | }
|
|---|
| 1723 |
|
|---|
| 1724 | return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
|
|---|
| 1725 | }
|
|---|
| 1726 |
|
|---|
| 1727 | /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
|
|---|
| 1728 | POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
|
|---|
| 1729 | static bool
|
|---|
| 1730 | open_symlink_nofollow_error (int err)
|
|---|
| 1731 | {
|
|---|
| 1732 | if (err == ELOOP || err == EMLINK)
|
|---|
| 1733 | return true;
|
|---|
| 1734 | #ifdef EFTYPE
|
|---|
| 1735 | if (err == EFTYPE)
|
|---|
| 1736 | return true;
|
|---|
| 1737 | #endif
|
|---|
| 1738 | return false;
|
|---|
| 1739 | }
|
|---|
| 1740 |
|
|---|
| 1741 | static bool
|
|---|
| 1742 | grepfile (int dirdesc, char const *name, bool follow, bool command_line)
|
|---|
| 1743 | {
|
|---|
| 1744 | int oflag = (O_RDONLY | O_NOCTTY
|
|---|
| 1745 | | (IGNORE_DUPLICATE_BRANCH_WARNING
|
|---|
| 1746 | (binary ? O_BINARY : 0))
|
|---|
| 1747 | | (follow ? 0 : O_NOFOLLOW)
|
|---|
| 1748 | | (skip_devices (command_line) ? O_NONBLOCK : 0));
|
|---|
| 1749 | int desc = openat_safer (dirdesc, name, oflag);
|
|---|
| 1750 | if (desc < 0)
|
|---|
| 1751 | {
|
|---|
| 1752 | if (follow || ! open_symlink_nofollow_error (errno))
|
|---|
| 1753 | suppressible_error (errno);
|
|---|
| 1754 | return true;
|
|---|
| 1755 | }
|
|---|
| 1756 | return grepdesc (desc, command_line);
|
|---|
| 1757 | }
|
|---|
| 1758 |
|
|---|
| 1759 | /* Read all data from FD, with status ST. Return true if successful,
|
|---|
| 1760 | false (setting errno) otherwise. */
|
|---|
| 1761 | static bool
|
|---|
| 1762 | drain_input (int fd, struct stat const *st)
|
|---|
| 1763 | {
|
|---|
| 1764 | ssize_t nbytes;
|
|---|
| 1765 | if (S_ISFIFO (st->st_mode) && dev_null_output)
|
|---|
| 1766 | {
|
|---|
| 1767 | #ifdef SPLICE_F_MOVE
|
|---|
| 1768 | /* Should be faster, since it need not copy data to user space. */
|
|---|
| 1769 | nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
|
|---|
| 1770 | INITIAL_BUFSIZE, SPLICE_F_MOVE);
|
|---|
| 1771 | if (0 <= nbytes || errno != EINVAL)
|
|---|
| 1772 | {
|
|---|
| 1773 | while (0 < nbytes)
|
|---|
| 1774 | nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
|
|---|
| 1775 | INITIAL_BUFSIZE, SPLICE_F_MOVE);
|
|---|
| 1776 | return nbytes == 0;
|
|---|
| 1777 | }
|
|---|
| 1778 | #endif
|
|---|
| 1779 | }
|
|---|
| 1780 | while ((nbytes = safe_read (fd, buffer, bufalloc)))
|
|---|
| 1781 | if (nbytes == SAFE_READ_ERROR)
|
|---|
| 1782 | return false;
|
|---|
| 1783 | return true;
|
|---|
| 1784 | }
|
|---|
| 1785 |
|
|---|
| 1786 | /* Finish reading from FD, with status ST and where end-of-file has
|
|---|
| 1787 | been seen if INEOF. Typically this is a no-op, but when reading
|
|---|
| 1788 | from standard input this may adjust the file offset or drain a
|
|---|
| 1789 | pipe. */
|
|---|
| 1790 |
|
|---|
| 1791 | static void
|
|---|
| 1792 | finalize_input (int fd, struct stat const *st, bool ineof)
|
|---|
| 1793 | {
|
|---|
| 1794 | if (fd == STDIN_FILENO
|
|---|
| 1795 | && (outleft
|
|---|
| 1796 | ? (!ineof
|
|---|
| 1797 | && (seek_failed
|
|---|
| 1798 | || (lseek (fd, 0, SEEK_END) < 0
|
|---|
| 1799 | /* Linux proc file system has EINVAL (Bug#25180). */
|
|---|
| 1800 | && errno != EINVAL))
|
|---|
| 1801 | && ! drain_input (fd, st))
|
|---|
| 1802 | : (bufoffset != after_last_match && !seek_failed
|
|---|
| 1803 | && lseek (fd, after_last_match, SEEK_SET) < 0)))
|
|---|
| 1804 | suppressible_error (errno);
|
|---|
| 1805 | }
|
|---|
| 1806 |
|
|---|
| 1807 | static bool
|
|---|
| 1808 | grepdesc (int desc, bool command_line)
|
|---|
| 1809 | {
|
|---|
| 1810 | intmax_t count;
|
|---|
| 1811 | bool status = true;
|
|---|
| 1812 | bool ineof = false;
|
|---|
| 1813 | struct stat st;
|
|---|
| 1814 |
|
|---|
| 1815 | /* Get the file status, possibly for the second time. This catches
|
|---|
| 1816 | a race condition if the directory entry changes after the
|
|---|
| 1817 | directory entry is read and before the file is opened. For
|
|---|
| 1818 | example, normally DESC is a directory only at the top level, but
|
|---|
| 1819 | there is an exception if some other process substitutes a
|
|---|
| 1820 | directory for a non-directory while 'grep' is running. */
|
|---|
| 1821 | if (fstat (desc, &st) != 0)
|
|---|
| 1822 | {
|
|---|
| 1823 | suppressible_error (errno);
|
|---|
| 1824 | goto closeout;
|
|---|
| 1825 | }
|
|---|
| 1826 |
|
|---|
| 1827 | if (desc != STDIN_FILENO && skip_devices (command_line)
|
|---|
| 1828 | && is_device_mode (st.st_mode))
|
|---|
| 1829 | goto closeout;
|
|---|
| 1830 |
|
|---|
| 1831 | if (desc != STDIN_FILENO && command_line
|
|---|
| 1832 | && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
|
|---|
| 1833 | goto closeout;
|
|---|
| 1834 |
|
|---|
| 1835 | /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */
|
|---|
| 1836 | if (out_file < 0)
|
|---|
| 1837 | out_file = !!S_ISDIR (st.st_mode);
|
|---|
| 1838 |
|
|---|
| 1839 | if (desc != STDIN_FILENO
|
|---|
| 1840 | && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
|
|---|
| 1841 | {
|
|---|
| 1842 | /* Traverse the directory starting with its full name, because
|
|---|
| 1843 | unfortunately fts provides no way to traverse the directory
|
|---|
| 1844 | starting from its file descriptor. */
|
|---|
| 1845 |
|
|---|
| 1846 | FTS *fts;
|
|---|
| 1847 | FTSENT *ent;
|
|---|
| 1848 | int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
|
|---|
| 1849 | char *fts_arg[2];
|
|---|
| 1850 |
|
|---|
| 1851 | /* Close DESC now, to conserve file descriptors if the race
|
|---|
| 1852 | condition occurs many times in a deep recursion. */
|
|---|
| 1853 | if (close (desc) != 0)
|
|---|
| 1854 | suppressible_error (errno);
|
|---|
| 1855 |
|
|---|
| 1856 | fts_arg[0] = (char *) filename;
|
|---|
| 1857 | fts_arg[1] = NULL;
|
|---|
| 1858 | fts = fts_open (fts_arg, opts, NULL);
|
|---|
| 1859 |
|
|---|
| 1860 | if (!fts)
|
|---|
| 1861 | xalloc_die ();
|
|---|
| 1862 | while ((ent = fts_read (fts)))
|
|---|
| 1863 | status &= grepdirent (fts, ent, command_line);
|
|---|
| 1864 | if (errno)
|
|---|
| 1865 | suppressible_error (errno);
|
|---|
| 1866 | if (fts_close (fts) != 0)
|
|---|
| 1867 | suppressible_error (errno);
|
|---|
| 1868 | return status;
|
|---|
| 1869 | }
|
|---|
| 1870 | if (desc != STDIN_FILENO
|
|---|
| 1871 | && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
|
|---|
| 1872 | || ((devices == SKIP_DEVICES
|
|---|
| 1873 | || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
|
|---|
| 1874 | && is_device_mode (st.st_mode))))
|
|---|
| 1875 | goto closeout;
|
|---|
| 1876 |
|
|---|
| 1877 | /* If there is a regular file on stdout and the current file refers
|
|---|
| 1878 | to the same i-node, we have to report the problem and skip it.
|
|---|
| 1879 | Otherwise when matching lines from some other input reach the
|
|---|
| 1880 | disk before we open this file, we can end up reading and matching
|
|---|
| 1881 | those lines and appending them to the file from which we're reading.
|
|---|
| 1882 | Then we'd have what appears to be an infinite loop that'd terminate
|
|---|
| 1883 | only upon filling the output file system or reaching a quota.
|
|---|
| 1884 | However, there is no risk of an infinite loop if grep is generating
|
|---|
| 1885 | no output, i.e., with --silent, --quiet, -q.
|
|---|
| 1886 | Similarly, with any of these:
|
|---|
| 1887 | --max-count=N (-m) (for N >= 2)
|
|---|
| 1888 | --files-with-matches (-l)
|
|---|
| 1889 | --files-without-match (-L)
|
|---|
| 1890 | there is no risk of trouble.
|
|---|
| 1891 | For --max-count=1, grep stops after printing the first match,
|
|---|
| 1892 | so there is no risk of malfunction. But even --max-count=2, with
|
|---|
| 1893 | input==output, while there is no risk of infloop, there is a race
|
|---|
| 1894 | condition that could result in "alternate" output. */
|
|---|
| 1895 | if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
|
|---|
| 1896 | && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
|
|---|
| 1897 | {
|
|---|
| 1898 | if (! suppress_errors)
|
|---|
| 1899 | error (0, 0, _("%s: input file is also the output"), input_filename ());
|
|---|
| 1900 | errseen = true;
|
|---|
| 1901 | goto closeout;
|
|---|
| 1902 | }
|
|---|
| 1903 |
|
|---|
| 1904 | count = grep (desc, &st, &ineof);
|
|---|
| 1905 | if (count_matches)
|
|---|
| 1906 | {
|
|---|
| 1907 | if (out_file)
|
|---|
| 1908 | {
|
|---|
| 1909 | print_filename ();
|
|---|
| 1910 | if (filename_mask)
|
|---|
| 1911 | print_sep (SEP_CHAR_SELECTED);
|
|---|
| 1912 | else
|
|---|
| 1913 | putchar_errno (0);
|
|---|
| 1914 | }
|
|---|
| 1915 | printf_errno ("%" PRIdMAX "\n", count);
|
|---|
| 1916 | if (line_buffered)
|
|---|
| 1917 | fflush_errno ();
|
|---|
| 1918 | }
|
|---|
| 1919 |
|
|---|
| 1920 | status = !count;
|
|---|
| 1921 |
|
|---|
| 1922 | if (list_files == LISTFILES_NONE)
|
|---|
| 1923 | finalize_input (desc, &st, ineof);
|
|---|
| 1924 | else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
|
|---|
| 1925 | {
|
|---|
| 1926 | print_filename ();
|
|---|
| 1927 | putchar_errno ('\n' & filename_mask);
|
|---|
| 1928 | if (line_buffered)
|
|---|
| 1929 | fflush_errno ();
|
|---|
| 1930 | }
|
|---|
| 1931 |
|
|---|
| 1932 | closeout:
|
|---|
| 1933 | if (desc != STDIN_FILENO && close (desc) != 0)
|
|---|
| 1934 | suppressible_error (errno);
|
|---|
| 1935 | return status;
|
|---|
| 1936 | }
|
|---|
| 1937 |
|
|---|
| 1938 | static bool
|
|---|
| 1939 | grep_command_line_arg (char const *arg)
|
|---|
| 1940 | {
|
|---|
| 1941 | if (STREQ (arg, "-"))
|
|---|
| 1942 | {
|
|---|
| 1943 | filename = label;
|
|---|
| 1944 | if (binary)
|
|---|
| 1945 | xset_binary_mode (STDIN_FILENO, O_BINARY);
|
|---|
| 1946 | return grepdesc (STDIN_FILENO, true);
|
|---|
| 1947 | }
|
|---|
| 1948 | else
|
|---|
| 1949 | {
|
|---|
| 1950 | filename = arg;
|
|---|
| 1951 | return grepfile (AT_FDCWD, arg, true, true);
|
|---|
| 1952 | }
|
|---|
| 1953 | }
|
|---|
| 1954 |
|
|---|
| 1955 | _Noreturn void usage (int);
|
|---|
| 1956 | void
|
|---|
| 1957 | usage (int status)
|
|---|
| 1958 | {
|
|---|
| 1959 | if (status != 0)
|
|---|
| 1960 | {
|
|---|
| 1961 | fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
|
|---|
| 1962 | getprogname ());
|
|---|
| 1963 | fprintf (stderr, _("Try '%s --help' for more information.\n"),
|
|---|
| 1964 | getprogname ());
|
|---|
| 1965 | }
|
|---|
| 1966 | else
|
|---|
| 1967 | {
|
|---|
| 1968 | printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
|
|---|
| 1969 | printf (_("Search for PATTERNS in each FILE.\n"));
|
|---|
| 1970 | printf (_("\
|
|---|
| 1971 | Example: %s -i 'hello world' menu.h main.c\n\
|
|---|
| 1972 | PATTERNS can contain multiple patterns separated by newlines.\n\
|
|---|
| 1973 | \n\
|
|---|
| 1974 | Pattern selection and interpretation:\n"), getprogname ());
|
|---|
| 1975 | printf (_("\
|
|---|
| 1976 | -E, --extended-regexp PATTERNS are extended regular expressions\n\
|
|---|
| 1977 | -F, --fixed-strings PATTERNS are strings\n\
|
|---|
| 1978 | -G, --basic-regexp PATTERNS are basic regular expressions\n\
|
|---|
| 1979 | -P, --perl-regexp PATTERNS are Perl regular expressions\n"));
|
|---|
| 1980 | /* -X is deliberately undocumented. */
|
|---|
| 1981 | printf (_("\
|
|---|
| 1982 | -e, --regexp=PATTERNS use PATTERNS for matching\n\
|
|---|
| 1983 | -f, --file=FILE take PATTERNS from FILE\n\
|
|---|
| 1984 | -i, --ignore-case ignore case distinctions in patterns and data\n\
|
|---|
| 1985 | --no-ignore-case do not ignore case distinctions (default)\n\
|
|---|
| 1986 | -w, --word-regexp match only whole words\n\
|
|---|
| 1987 | -x, --line-regexp match only whole lines\n\
|
|---|
| 1988 | -z, --null-data a data line ends in 0 byte, not newline\n"));
|
|---|
| 1989 | printf (_("\
|
|---|
| 1990 | \n\
|
|---|
| 1991 | Miscellaneous:\n\
|
|---|
| 1992 | -s, --no-messages suppress error messages\n\
|
|---|
| 1993 | -v, --invert-match select non-matching lines\n\
|
|---|
| 1994 | -V, --version display version information and exit\n\
|
|---|
| 1995 | --help display this help text and exit\n"));
|
|---|
| 1996 | printf (_("\
|
|---|
| 1997 | \n\
|
|---|
| 1998 | Output control:\n\
|
|---|
| 1999 | -m, --max-count=NUM stop after NUM selected lines\n\
|
|---|
| 2000 | -b, --byte-offset print the byte offset with output lines\n\
|
|---|
| 2001 | -n, --line-number print line number with output lines\n\
|
|---|
| 2002 | --line-buffered flush output on every line\n\
|
|---|
| 2003 | -H, --with-filename print file name with output lines\n\
|
|---|
| 2004 | -h, --no-filename suppress the file name prefix on output\n\
|
|---|
| 2005 | --label=LABEL use LABEL as the standard input file name prefix\n\
|
|---|
| 2006 | "));
|
|---|
| 2007 | printf (_("\
|
|---|
| 2008 | -o, --only-matching show only nonempty parts of lines that match\n\
|
|---|
| 2009 | -q, --quiet, --silent suppress all normal output\n\
|
|---|
| 2010 | --binary-files=TYPE assume that binary files are TYPE;\n\
|
|---|
| 2011 | TYPE is 'binary', 'text', or 'without-match'\n\
|
|---|
| 2012 | -a, --text equivalent to --binary-files=text\n\
|
|---|
| 2013 | "));
|
|---|
| 2014 | printf (_("\
|
|---|
| 2015 | -I equivalent to --binary-files=without-match\n\
|
|---|
| 2016 | -d, --directories=ACTION how to handle directories;\n\
|
|---|
| 2017 | ACTION is 'read', 'recurse', or 'skip'\n\
|
|---|
| 2018 | -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
|
|---|
| 2019 | ACTION is 'read' or 'skip'\n\
|
|---|
| 2020 | -r, --recursive like --directories=recurse\n\
|
|---|
| 2021 | -R, --dereference-recursive likewise, but follow all symlinks\n\
|
|---|
| 2022 | "));
|
|---|
| 2023 | printf (_("\
|
|---|
| 2024 | --include=GLOB search only files that match GLOB (a file pattern)"
|
|---|
| 2025 | "\n\
|
|---|
| 2026 | --exclude=GLOB skip files that match GLOB\n\
|
|---|
| 2027 | --exclude-from=FILE skip files that match any file pattern from FILE\n\
|
|---|
| 2028 | --exclude-dir=GLOB skip directories that match GLOB\n\
|
|---|
| 2029 | "));
|
|---|
| 2030 | printf (_("\
|
|---|
| 2031 | -L, --files-without-match print only names of FILEs with no selected lines\n\
|
|---|
| 2032 | -l, --files-with-matches print only names of FILEs with selected lines\n\
|
|---|
| 2033 | -c, --count print only a count of selected lines per FILE\n\
|
|---|
| 2034 | -T, --initial-tab make tabs line up (if needed)\n\
|
|---|
| 2035 | -Z, --null print 0 byte after FILE name\n"));
|
|---|
| 2036 | printf (_("\
|
|---|
| 2037 | \n\
|
|---|
| 2038 | Context control:\n\
|
|---|
| 2039 | -B, --before-context=NUM print NUM lines of leading context\n\
|
|---|
| 2040 | -A, --after-context=NUM print NUM lines of trailing context\n\
|
|---|
| 2041 | -C, --context=NUM print NUM lines of output context\n\
|
|---|
| 2042 | "));
|
|---|
| 2043 | printf (_("\
|
|---|
| 2044 | -NUM same as --context=NUM\n\
|
|---|
| 2045 | --group-separator=SEP print SEP on line between matches with context\n\
|
|---|
| 2046 | --no-group-separator do not print separator for matches with context\n\
|
|---|
| 2047 | --color[=WHEN],\n\
|
|---|
| 2048 | --colour[=WHEN] use markers to highlight the matching strings;\n\
|
|---|
| 2049 | WHEN is 'always', 'never', or 'auto'\n\
|
|---|
| 2050 | -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
|
|---|
| 2051 | \n"));
|
|---|
| 2052 | printf (_("\
|
|---|
| 2053 | When FILE is '-', read standard input. With no FILE, read '.' if\n\
|
|---|
| 2054 | recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
|
|---|
| 2055 | Exit status is 0 if any line is selected, 1 otherwise;\n\
|
|---|
| 2056 | if any error occurs and -q is not given, the exit status is 2.\n"));
|
|---|
| 2057 | emit_bug_reporting_address ();
|
|---|
| 2058 | }
|
|---|
| 2059 | exit (status);
|
|---|
| 2060 | }
|
|---|
| 2061 |
|
|---|
| 2062 | /* Pattern compilers and matchers. */
|
|---|
| 2063 |
|
|---|
| 2064 | static struct
|
|---|
| 2065 | {
|
|---|
| 2066 | char name[12];
|
|---|
| 2067 | int syntax; /* used if compile == GEAcompile */
|
|---|
| 2068 | compile_fp_t compile;
|
|---|
| 2069 | execute_fp_t execute;
|
|---|
| 2070 | } const matchers[] = {
|
|---|
| 2071 | { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
|
|---|
| 2072 | { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
|
|---|
| 2073 | { "fgrep", 0, Fcompile, Fexecute, },
|
|---|
| 2074 | { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
|
|---|
| 2075 | { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
|
|---|
| 2076 | { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
|
|---|
| 2077 | #if HAVE_LIBPCRE
|
|---|
| 2078 | { "perl", 0, Pcompile, Pexecute, },
|
|---|
| 2079 | #endif
|
|---|
| 2080 | };
|
|---|
| 2081 | /* Keep these in sync with the 'matchers' table. */
|
|---|
| 2082 | enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
|
|---|
| 2083 |
|
|---|
| 2084 | /* Return the index of the matcher corresponding to M if available.
|
|---|
| 2085 | MATCHER is the index of the previous matcher, or -1 if none.
|
|---|
| 2086 | Exit in case of conflicts or if M is not available. */
|
|---|
| 2087 | static int
|
|---|
| 2088 | setmatcher (char const *m, int matcher)
|
|---|
| 2089 | {
|
|---|
| 2090 | for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
|
|---|
| 2091 | if (STREQ (m, matchers[i].name))
|
|---|
| 2092 | {
|
|---|
| 2093 | if (0 <= matcher && matcher != i)
|
|---|
| 2094 | die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
|
|---|
| 2095 | return i;
|
|---|
| 2096 | }
|
|---|
| 2097 |
|
|---|
| 2098 | #if !HAVE_LIBPCRE
|
|---|
| 2099 | if (STREQ (m, "perl"))
|
|---|
| 2100 | die (EXIT_TROUBLE, 0,
|
|---|
| 2101 | _("Perl matching not supported in a --disable-perl-regexp build"));
|
|---|
| 2102 | #endif
|
|---|
| 2103 | die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
|
|---|
| 2104 | }
|
|---|
| 2105 |
|
|---|
| 2106 | /* Get the next non-digit option from ARGC and ARGV.
|
|---|
| 2107 | Return -1 if there are no more options.
|
|---|
| 2108 | Process any digit options that were encountered on the way,
|
|---|
| 2109 | and store the resulting integer into *DEFAULT_CONTEXT. */
|
|---|
| 2110 | static int
|
|---|
| 2111 | get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
|
|---|
| 2112 | {
|
|---|
| 2113 | static int prev_digit_optind = -1;
|
|---|
| 2114 | int this_digit_optind;
|
|---|
| 2115 | bool was_digit;
|
|---|
| 2116 | char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
|
|---|
| 2117 | char *p = buf;
|
|---|
| 2118 | int opt;
|
|---|
| 2119 |
|
|---|
| 2120 | was_digit = false;
|
|---|
| 2121 | this_digit_optind = optind;
|
|---|
| 2122 | while (true)
|
|---|
| 2123 | {
|
|---|
| 2124 | opt = getopt_long (argc, (char **) argv, short_options,
|
|---|
| 2125 | long_options, NULL);
|
|---|
| 2126 | if (! c_isdigit (opt))
|
|---|
| 2127 | break;
|
|---|
| 2128 |
|
|---|
| 2129 | if (prev_digit_optind != this_digit_optind || !was_digit)
|
|---|
| 2130 | {
|
|---|
| 2131 | /* Reset to start another context length argument. */
|
|---|
| 2132 | p = buf;
|
|---|
| 2133 | }
|
|---|
| 2134 | else
|
|---|
| 2135 | {
|
|---|
| 2136 | /* Suppress trivial leading zeros, to avoid incorrect
|
|---|
| 2137 | diagnostic on strings like 00000000000. */
|
|---|
| 2138 | p -= buf[0] == '0';
|
|---|
| 2139 | }
|
|---|
| 2140 |
|
|---|
| 2141 | if (p == buf + sizeof buf - 4)
|
|---|
| 2142 | {
|
|---|
| 2143 | /* Too many digits. Append "..." to make context_length_arg
|
|---|
| 2144 | complain about "X...", where X contains the digits seen
|
|---|
| 2145 | so far. */
|
|---|
| 2146 | strcpy (p, "...");
|
|---|
| 2147 | p += 3;
|
|---|
| 2148 | break;
|
|---|
| 2149 | }
|
|---|
| 2150 | *p++ = opt;
|
|---|
| 2151 |
|
|---|
| 2152 | was_digit = true;
|
|---|
| 2153 | prev_digit_optind = this_digit_optind;
|
|---|
| 2154 | this_digit_optind = optind;
|
|---|
| 2155 | }
|
|---|
| 2156 | if (p != buf)
|
|---|
| 2157 | {
|
|---|
| 2158 | *p = '\0';
|
|---|
| 2159 | context_length_arg (buf, default_context);
|
|---|
| 2160 | }
|
|---|
| 2161 |
|
|---|
| 2162 | return opt;
|
|---|
| 2163 | }
|
|---|
| 2164 |
|
|---|
| 2165 | /* Parse GREP_COLORS. The default would look like:
|
|---|
| 2166 | GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
|
|---|
| 2167 | with boolean capabilities (ne and rv) unset (i.e., omitted).
|
|---|
| 2168 | No character escaping is needed or supported. */
|
|---|
| 2169 | static void
|
|---|
| 2170 | parse_grep_colors (void)
|
|---|
| 2171 | {
|
|---|
| 2172 | const char *p;
|
|---|
| 2173 | char *q;
|
|---|
| 2174 | char *name;
|
|---|
| 2175 | char *val;
|
|---|
| 2176 |
|
|---|
| 2177 | p = getenv ("GREP_COLORS"); /* Plural! */
|
|---|
| 2178 | if (p == NULL || *p == '\0')
|
|---|
| 2179 | return;
|
|---|
| 2180 |
|
|---|
| 2181 | /* Work off a writable copy. */
|
|---|
| 2182 | q = xstrdup (p);
|
|---|
| 2183 |
|
|---|
| 2184 | name = q;
|
|---|
| 2185 | val = NULL;
|
|---|
| 2186 | /* From now on, be well-formed or you're gone. */
|
|---|
| 2187 | for (;;)
|
|---|
| 2188 | if (*q == ':' || *q == '\0')
|
|---|
| 2189 | {
|
|---|
| 2190 | char c = *q;
|
|---|
| 2191 | struct color_cap const *cap;
|
|---|
| 2192 |
|
|---|
| 2193 | *q++ = '\0'; /* Terminate name or val. */
|
|---|
| 2194 | /* Empty name without val (empty cap)
|
|---|
| 2195 | * won't match and will be ignored. */
|
|---|
| 2196 | for (cap = color_dict; cap->name; cap++)
|
|---|
| 2197 | if (STREQ (cap->name, name))
|
|---|
| 2198 | break;
|
|---|
| 2199 | /* If name unknown, go on for forward compatibility. */
|
|---|
| 2200 | if (cap->var && val)
|
|---|
| 2201 | *(cap->var) = val;
|
|---|
| 2202 | if (cap->fct)
|
|---|
| 2203 | cap->fct ();
|
|---|
| 2204 | if (c == '\0')
|
|---|
| 2205 | return;
|
|---|
| 2206 | name = q;
|
|---|
| 2207 | val = NULL;
|
|---|
| 2208 | }
|
|---|
| 2209 | else if (*q == '=')
|
|---|
| 2210 | {
|
|---|
| 2211 | if (q == name || val)
|
|---|
| 2212 | return;
|
|---|
| 2213 | *q++ = '\0'; /* Terminate name. */
|
|---|
| 2214 | val = q; /* Can be the empty string. */
|
|---|
| 2215 | }
|
|---|
| 2216 | else if (val == NULL)
|
|---|
| 2217 | q++; /* Accumulate name. */
|
|---|
| 2218 | else if (*q == ';' || c_isdigit (*q))
|
|---|
| 2219 | q++; /* Accumulate val. Protect the terminal from being sent crap. */
|
|---|
| 2220 | else
|
|---|
| 2221 | return;
|
|---|
| 2222 | }
|
|---|
| 2223 |
|
|---|
| 2224 | /* Return true if PAT (of length PATLEN) contains an encoding error. */
|
|---|
| 2225 | static bool
|
|---|
| 2226 | contains_encoding_error (char const *pat, size_t patlen)
|
|---|
| 2227 | {
|
|---|
| 2228 | mbstate_t mbs = { 0 };
|
|---|
| 2229 | size_t i, charlen;
|
|---|
| 2230 |
|
|---|
| 2231 | for (i = 0; i < patlen; i += charlen)
|
|---|
| 2232 | {
|
|---|
| 2233 | charlen = mb_clen (pat + i, patlen - i, &mbs);
|
|---|
| 2234 | if ((size_t) -2 <= charlen)
|
|---|
| 2235 | return true;
|
|---|
| 2236 | }
|
|---|
| 2237 | return false;
|
|---|
| 2238 | }
|
|---|
| 2239 |
|
|---|
| 2240 | /* When ignoring case and (-E or -F or -G), then for each single-byte
|
|---|
| 2241 | character I, ok_fold[I] is 1 if every case folded counterpart of I
|
|---|
| 2242 | is also single-byte, and is -1 otherwise. */
|
|---|
| 2243 | static signed char ok_fold[NCHAR];
|
|---|
| 2244 | static void
|
|---|
| 2245 | setup_ok_fold (void)
|
|---|
| 2246 | {
|
|---|
| 2247 | for (int i = 0; i < NCHAR; i++)
|
|---|
| 2248 | {
|
|---|
| 2249 | wint_t wi = localeinfo.sbctowc[i];
|
|---|
| 2250 | if (wi == WEOF)
|
|---|
| 2251 | continue;
|
|---|
| 2252 |
|
|---|
| 2253 | int ok = 1;
|
|---|
| 2254 | wchar_t folded[CASE_FOLDED_BUFSIZE];
|
|---|
| 2255 | for (int n = case_folded_counterparts (wi, folded); 0 <= --n; )
|
|---|
| 2256 | {
|
|---|
| 2257 | char buf[MB_LEN_MAX];
|
|---|
| 2258 | mbstate_t s = { 0 };
|
|---|
| 2259 | if (wcrtomb (buf, folded[n], &s) != 1)
|
|---|
| 2260 | {
|
|---|
| 2261 | ok = -1;
|
|---|
| 2262 | break;
|
|---|
| 2263 | }
|
|---|
| 2264 | }
|
|---|
| 2265 | ok_fold[i] = ok;
|
|---|
| 2266 | }
|
|---|
| 2267 | }
|
|---|
| 2268 |
|
|---|
| 2269 | /* Return the number of bytes in the initial character of PAT, of size
|
|---|
| 2270 | PATLEN, if Fcompile can handle that character. Return -1 if
|
|---|
| 2271 | Fcompile cannot handle it. MBS is the multibyte conversion state.
|
|---|
| 2272 | PATLEN must be nonzero. */
|
|---|
| 2273 |
|
|---|
| 2274 | static int
|
|---|
| 2275 | fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
|
|---|
| 2276 | {
|
|---|
| 2277 | unsigned char pat0 = pat[0];
|
|---|
| 2278 |
|
|---|
| 2279 | /* If PAT starts with a single-byte character, Fcompile works if
|
|---|
| 2280 | every case folded counterpart is also single-byte. */
|
|---|
| 2281 | if (localeinfo.sbctowc[pat0] != WEOF)
|
|---|
| 2282 | return ok_fold[pat0];
|
|---|
| 2283 |
|
|---|
| 2284 | wchar_t wc;
|
|---|
| 2285 | size_t wn = mbrtowc (&wc, pat, patlen, mbs);
|
|---|
| 2286 |
|
|---|
| 2287 | /* If PAT starts with an encoding error, Fcompile does not work. */
|
|---|
| 2288 | if (MB_LEN_MAX < wn)
|
|---|
| 2289 | return -1;
|
|---|
| 2290 |
|
|---|
| 2291 | /* PAT starts with a multibyte character. Fcompile works if the
|
|---|
| 2292 | character has no case folded counterparts and toupper translates
|
|---|
| 2293 | none of its encoding's bytes. */
|
|---|
| 2294 | wchar_t folded[CASE_FOLDED_BUFSIZE];
|
|---|
| 2295 | if (case_folded_counterparts (wc, folded))
|
|---|
| 2296 | return -1;
|
|---|
| 2297 | for (int i = wn; 0 < --i; )
|
|---|
| 2298 | {
|
|---|
| 2299 | unsigned char c = pat[i];
|
|---|
| 2300 | if (toupper (c) != c)
|
|---|
| 2301 | return -1;
|
|---|
| 2302 | }
|
|---|
| 2303 | return wn;
|
|---|
| 2304 | }
|
|---|
| 2305 |
|
|---|
| 2306 | /* Return true if the -F patterns PAT, of size PATLEN, contain only
|
|---|
| 2307 | single-byte characters that case-fold only to single-byte
|
|---|
| 2308 | characters, or multibyte characters not subject to case folding,
|
|---|
| 2309 | and so can be processed by Fcompile. */
|
|---|
| 2310 |
|
|---|
| 2311 | static bool
|
|---|
| 2312 | fgrep_icase_available (char const *pat, size_t patlen)
|
|---|
| 2313 | {
|
|---|
| 2314 | mbstate_t mbs = {0,};
|
|---|
| 2315 |
|
|---|
| 2316 | for (size_t i = 0; i < patlen; )
|
|---|
| 2317 | {
|
|---|
| 2318 | int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
|
|---|
| 2319 | if (n < 0)
|
|---|
| 2320 | return false;
|
|---|
| 2321 | i += n;
|
|---|
| 2322 | }
|
|---|
| 2323 |
|
|---|
| 2324 | return true;
|
|---|
| 2325 | }
|
|---|
| 2326 |
|
|---|
| 2327 | /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
|
|---|
| 2328 |
|
|---|
| 2329 | void
|
|---|
| 2330 | fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
|
|---|
| 2331 | {
|
|---|
| 2332 | size_t len = *len_p;
|
|---|
| 2333 | char *keys = *keys_p;
|
|---|
| 2334 | mbstate_t mb_state = { 0 };
|
|---|
| 2335 | char *new_keys = xnmalloc (len + 1, 2);
|
|---|
| 2336 | char *p = new_keys;
|
|---|
| 2337 | size_t n;
|
|---|
| 2338 |
|
|---|
| 2339 | for (; len; keys += n, len -= n)
|
|---|
| 2340 | {
|
|---|
| 2341 | n = mb_clen (keys, len, &mb_state);
|
|---|
| 2342 | switch (n)
|
|---|
| 2343 | {
|
|---|
| 2344 | case (size_t) -2:
|
|---|
| 2345 | n = len;
|
|---|
| 2346 | FALLTHROUGH;
|
|---|
| 2347 | default:
|
|---|
| 2348 | p = mempcpy (p, keys, n);
|
|---|
| 2349 | break;
|
|---|
| 2350 |
|
|---|
| 2351 | case (size_t) -1:
|
|---|
| 2352 | memset (&mb_state, 0, sizeof mb_state);
|
|---|
| 2353 | n = 1;
|
|---|
| 2354 | FALLTHROUGH;
|
|---|
| 2355 | case 1:
|
|---|
| 2356 | switch (*keys)
|
|---|
| 2357 | {
|
|---|
| 2358 | case '$': case '*': case '.': case '[': case '\\': case '^':
|
|---|
| 2359 | *p++ = '\\'; break;
|
|---|
| 2360 | }
|
|---|
| 2361 | *p++ = *keys;
|
|---|
| 2362 | break;
|
|---|
| 2363 | }
|
|---|
| 2364 | }
|
|---|
| 2365 |
|
|---|
| 2366 | *p = '\n';
|
|---|
| 2367 | free (*keys_p);
|
|---|
| 2368 | *keys_p = new_keys;
|
|---|
| 2369 | *len_p = p - new_keys;
|
|---|
| 2370 | }
|
|---|
| 2371 |
|
|---|
| 2372 | /* If it is easy, convert the MATCHER-style patterns KEYS (of size
|
|---|
| 2373 | *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
|
|---|
| 2374 | return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
|
|---|
| 2375 | return MATCHER. This function is conservative and sometimes misses
|
|---|
| 2376 | conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
|
|---|
| 2377 | to the -F pattern "a". */
|
|---|
| 2378 |
|
|---|
| 2379 | static int
|
|---|
| 2380 | try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
|
|---|
| 2381 | {
|
|---|
| 2382 | int result = matcher;
|
|---|
| 2383 | size_t len = *len_p;
|
|---|
| 2384 | char *new_keys = xmalloc (len + 1);
|
|---|
| 2385 | char *p = new_keys;
|
|---|
| 2386 | char const *q = keys;
|
|---|
| 2387 | mbstate_t mb_state = { 0 };
|
|---|
| 2388 |
|
|---|
| 2389 | while (len != 0)
|
|---|
| 2390 | {
|
|---|
| 2391 | switch (*q)
|
|---|
| 2392 | {
|
|---|
| 2393 | case '$': case '*': case '.': case '[': case '^':
|
|---|
| 2394 | goto fail;
|
|---|
| 2395 |
|
|---|
| 2396 | case '(': case '+': case '?': case '{': case '|':
|
|---|
| 2397 | /* There is no "case ')'" here, as "grep -E ')'" acts like
|
|---|
| 2398 | "grep -E '\)'". */
|
|---|
| 2399 | if (matcher != G_MATCHER_INDEX)
|
|---|
| 2400 | goto fail;
|
|---|
| 2401 | break;
|
|---|
| 2402 |
|
|---|
| 2403 | case '\\':
|
|---|
| 2404 | if (1 < len)
|
|---|
| 2405 | switch (q[1])
|
|---|
| 2406 | {
|
|---|
| 2407 | case '\n':
|
|---|
| 2408 | case 'B': case 'S': case 'W': case'\'': case '<':
|
|---|
| 2409 | case 'b': case 's': case 'w': case '`': case '>':
|
|---|
| 2410 | case '1': case '2': case '3': case '4':
|
|---|
| 2411 | case '5': case '6': case '7': case '8': case '9':
|
|---|
| 2412 | goto fail;
|
|---|
| 2413 |
|
|---|
| 2414 | case '(': case '+': case '?': case '{': case '|':
|
|---|
| 2415 | /* Pass '\)' to GEAcompile so it can complain. Otherwise,
|
|---|
| 2416 | "grep '\)'" would act like "grep ')'" while "grep '.*\)'
|
|---|
| 2417 | would be an error. */
|
|---|
| 2418 | case ')':
|
|---|
| 2419 | if (matcher == G_MATCHER_INDEX)
|
|---|
| 2420 | goto fail;
|
|---|
| 2421 | FALLTHROUGH;
|
|---|
| 2422 | default:
|
|---|
| 2423 | q++, len--;
|
|---|
| 2424 | break;
|
|---|
| 2425 | }
|
|---|
| 2426 | break;
|
|---|
| 2427 | }
|
|---|
| 2428 |
|
|---|
| 2429 | {
|
|---|
| 2430 | size_t n;
|
|---|
| 2431 | if (match_icase)
|
|---|
| 2432 | {
|
|---|
| 2433 | int ni = fgrep_icase_charlen (q, len, &mb_state);
|
|---|
| 2434 | if (ni < 0)
|
|---|
| 2435 | goto fail;
|
|---|
| 2436 | n = ni;
|
|---|
| 2437 | }
|
|---|
| 2438 | else
|
|---|
| 2439 | {
|
|---|
| 2440 | n = mb_clen (q, len, &mb_state);
|
|---|
| 2441 | if (MB_LEN_MAX < n)
|
|---|
| 2442 | goto fail;
|
|---|
| 2443 | }
|
|---|
| 2444 |
|
|---|
| 2445 | p = mempcpy (p, q, n);
|
|---|
| 2446 | q += n;
|
|---|
| 2447 | len -= n;
|
|---|
| 2448 | }
|
|---|
| 2449 | }
|
|---|
| 2450 |
|
|---|
| 2451 | if (*len_p != p - new_keys)
|
|---|
| 2452 | {
|
|---|
| 2453 | *len_p = p - new_keys;
|
|---|
| 2454 | char *keys_end = mempcpy (keys, new_keys, p - new_keys);
|
|---|
| 2455 | *keys_end = '\n';
|
|---|
| 2456 | }
|
|---|
| 2457 | result = F_MATCHER_INDEX;
|
|---|
| 2458 |
|
|---|
| 2459 | fail:
|
|---|
| 2460 | free (new_keys);
|
|---|
| 2461 | return result;
|
|---|
| 2462 | }
|
|---|
| 2463 |
|
|---|
| 2464 | int
|
|---|
| 2465 | main (int argc, char **argv)
|
|---|
| 2466 | {
|
|---|
| 2467 | char *keys = NULL;
|
|---|
| 2468 | size_t keycc = 0, keyalloc = 0;
|
|---|
| 2469 | int matcher = -1;
|
|---|
| 2470 | int opt;
|
|---|
| 2471 | int prev_optind, last_recursive;
|
|---|
| 2472 | int fread_errno;
|
|---|
| 2473 | intmax_t default_context;
|
|---|
| 2474 | FILE *fp;
|
|---|
| 2475 | exit_failure = EXIT_TROUBLE;
|
|---|
| 2476 | initialize_main (&argc, &argv);
|
|---|
| 2477 |
|
|---|
| 2478 | /* Which command-line options have been specified for filename output.
|
|---|
| 2479 | -1 for -h, 1 for -H, 0 for neither. */
|
|---|
| 2480 | int filename_option = 0;
|
|---|
| 2481 |
|
|---|
| 2482 | eolbyte = '\n';
|
|---|
| 2483 | filename_mask = ~0;
|
|---|
| 2484 |
|
|---|
| 2485 | max_count = INTMAX_MAX;
|
|---|
| 2486 |
|
|---|
| 2487 | /* The value -1 means to use DEFAULT_CONTEXT. */
|
|---|
| 2488 | out_after = out_before = -1;
|
|---|
| 2489 | /* Default before/after context: changed by -C/-NUM options */
|
|---|
| 2490 | default_context = -1;
|
|---|
| 2491 | /* Changed by -o option */
|
|---|
| 2492 | only_matching = false;
|
|---|
| 2493 |
|
|---|
| 2494 | /* Internationalization. */
|
|---|
| 2495 | #if defined HAVE_SETLOCALE
|
|---|
| 2496 | setlocale (LC_ALL, "");
|
|---|
| 2497 | #endif
|
|---|
| 2498 | #if defined ENABLE_NLS
|
|---|
| 2499 | bindtextdomain (PACKAGE, LOCALEDIR);
|
|---|
| 2500 | textdomain (PACKAGE);
|
|---|
| 2501 | #endif
|
|---|
| 2502 |
|
|---|
| 2503 | init_localeinfo (&localeinfo);
|
|---|
| 2504 |
|
|---|
| 2505 | atexit (clean_up_stdout);
|
|---|
| 2506 | c_stack_action (NULL);
|
|---|
| 2507 |
|
|---|
| 2508 | last_recursive = 0;
|
|---|
| 2509 |
|
|---|
| 2510 | pattern_table = hash_initialize (0, 0, hash_pattern, compare_patterns, 0);
|
|---|
| 2511 | if (!pattern_table)
|
|---|
| 2512 | xalloc_die ();
|
|---|
| 2513 |
|
|---|
| 2514 | while (prev_optind = optind,
|
|---|
| 2515 | (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
|
|---|
| 2516 | switch (opt)
|
|---|
| 2517 | {
|
|---|
| 2518 | case 'A':
|
|---|
| 2519 | context_length_arg (optarg, &out_after);
|
|---|
| 2520 | break;
|
|---|
| 2521 |
|
|---|
| 2522 | case 'B':
|
|---|
| 2523 | context_length_arg (optarg, &out_before);
|
|---|
| 2524 | break;
|
|---|
| 2525 |
|
|---|
| 2526 | case 'C':
|
|---|
| 2527 | /* Set output match context, but let any explicit leading or
|
|---|
| 2528 | trailing amount specified with -A or -B stand. */
|
|---|
| 2529 | context_length_arg (optarg, &default_context);
|
|---|
| 2530 | break;
|
|---|
| 2531 |
|
|---|
| 2532 | case 'D':
|
|---|
| 2533 | if (STREQ (optarg, "read"))
|
|---|
| 2534 | devices = READ_DEVICES;
|
|---|
| 2535 | else if (STREQ (optarg, "skip"))
|
|---|
| 2536 | devices = SKIP_DEVICES;
|
|---|
| 2537 | else
|
|---|
| 2538 | die (EXIT_TROUBLE, 0, _("unknown devices method"));
|
|---|
| 2539 | break;
|
|---|
| 2540 |
|
|---|
| 2541 | case 'E':
|
|---|
| 2542 | matcher = setmatcher ("egrep", matcher);
|
|---|
| 2543 | break;
|
|---|
| 2544 |
|
|---|
| 2545 | case 'F':
|
|---|
| 2546 | matcher = setmatcher ("fgrep", matcher);
|
|---|
| 2547 | break;
|
|---|
| 2548 |
|
|---|
| 2549 | case 'P':
|
|---|
| 2550 | matcher = setmatcher ("perl", matcher);
|
|---|
| 2551 | break;
|
|---|
| 2552 |
|
|---|
| 2553 | case 'G':
|
|---|
| 2554 | matcher = setmatcher ("grep", matcher);
|
|---|
| 2555 | break;
|
|---|
| 2556 |
|
|---|
| 2557 | case 'X': /* undocumented on purpose */
|
|---|
| 2558 | matcher = setmatcher (optarg, matcher);
|
|---|
| 2559 | break;
|
|---|
| 2560 |
|
|---|
| 2561 | case 'H':
|
|---|
| 2562 | filename_option = 1;
|
|---|
| 2563 | break;
|
|---|
| 2564 |
|
|---|
| 2565 | case 'I':
|
|---|
| 2566 | binary_files = WITHOUT_MATCH_BINARY_FILES;
|
|---|
| 2567 | break;
|
|---|
| 2568 |
|
|---|
| 2569 | case 'T':
|
|---|
| 2570 | align_tabs = true;
|
|---|
| 2571 | break;
|
|---|
| 2572 |
|
|---|
| 2573 | case 'U':
|
|---|
| 2574 | if (O_BINARY)
|
|---|
| 2575 | binary = true;
|
|---|
| 2576 | break;
|
|---|
| 2577 |
|
|---|
| 2578 | case 'u':
|
|---|
| 2579 | /* Obsolete option; it had no effect; FIXME: remove in 2023 */
|
|---|
| 2580 | error (0, 0, _("warning: --unix-byte-offsets (-u) is obsolete"));
|
|---|
| 2581 | break;
|
|---|
| 2582 |
|
|---|
| 2583 | case 'V':
|
|---|
| 2584 | show_version = true;
|
|---|
| 2585 | break;
|
|---|
| 2586 |
|
|---|
| 2587 | case 'a':
|
|---|
| 2588 | binary_files = TEXT_BINARY_FILES;
|
|---|
| 2589 | break;
|
|---|
| 2590 |
|
|---|
| 2591 | case 'b':
|
|---|
| 2592 | out_byte = true;
|
|---|
| 2593 | break;
|
|---|
| 2594 |
|
|---|
| 2595 | case 'c':
|
|---|
| 2596 | count_matches = true;
|
|---|
| 2597 | break;
|
|---|
| 2598 |
|
|---|
| 2599 | case 'd':
|
|---|
| 2600 | directories = XARGMATCH ("--directories", optarg,
|
|---|
| 2601 | directories_args, directories_types);
|
|---|
| 2602 | if (directories == RECURSE_DIRECTORIES)
|
|---|
| 2603 | last_recursive = prev_optind;
|
|---|
| 2604 | break;
|
|---|
| 2605 |
|
|---|
| 2606 | case 'e':
|
|---|
| 2607 | {
|
|---|
| 2608 | ptrdiff_t cc = strlen (optarg);
|
|---|
| 2609 | if (keyalloc < keycc + cc + 1)
|
|---|
| 2610 | {
|
|---|
| 2611 | keyalloc = keycc + cc + 1;
|
|---|
| 2612 | pattern_array = keys = x2realloc (keys, &keyalloc);
|
|---|
| 2613 | }
|
|---|
| 2614 | char *keyend = mempcpy (keys + keycc, optarg, cc);
|
|---|
| 2615 | *keyend = '\n';
|
|---|
| 2616 | keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
|
|---|
| 2617 | }
|
|---|
| 2618 | break;
|
|---|
| 2619 |
|
|---|
| 2620 | case 'f':
|
|---|
| 2621 | {
|
|---|
| 2622 | if (STREQ (optarg, "-"))
|
|---|
| 2623 | {
|
|---|
| 2624 | if (binary)
|
|---|
| 2625 | xset_binary_mode (STDIN_FILENO, O_BINARY);
|
|---|
| 2626 | fp = stdin;
|
|---|
| 2627 | }
|
|---|
| 2628 | else
|
|---|
| 2629 | {
|
|---|
| 2630 | fp = fopen (optarg, binary ? "rb" : "r");
|
|---|
| 2631 | if (!fp)
|
|---|
| 2632 | die (EXIT_TROUBLE, errno, "%s", optarg);
|
|---|
| 2633 | }
|
|---|
| 2634 | ptrdiff_t newkeycc = keycc, cc;
|
|---|
| 2635 | for (;; newkeycc += cc)
|
|---|
| 2636 | {
|
|---|
| 2637 | if (keyalloc <= newkeycc + 1)
|
|---|
| 2638 | pattern_array = keys = x2realloc (keys, &keyalloc);
|
|---|
| 2639 | cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
|
|---|
| 2640 | if (cc == 0)
|
|---|
| 2641 | break;
|
|---|
| 2642 | }
|
|---|
| 2643 | fread_errno = errno;
|
|---|
| 2644 | if (ferror (fp))
|
|---|
| 2645 | die (EXIT_TROUBLE, fread_errno, "%s", optarg);
|
|---|
| 2646 | if (fp != stdin)
|
|---|
| 2647 | fclose (fp);
|
|---|
| 2648 | /* Append final newline if file ended in non-newline. */
|
|---|
| 2649 | if (newkeycc != keycc && keys[newkeycc - 1] != '\n')
|
|---|
| 2650 | keys[newkeycc++] = '\n';
|
|---|
| 2651 | keycc = update_patterns (keys, keycc, newkeycc, optarg);
|
|---|
| 2652 | }
|
|---|
| 2653 | break;
|
|---|
| 2654 |
|
|---|
| 2655 | case 'h':
|
|---|
| 2656 | filename_option = -1;
|
|---|
| 2657 | break;
|
|---|
| 2658 |
|
|---|
| 2659 | case 'i':
|
|---|
| 2660 | case 'y': /* For old-timers . . . */
|
|---|
| 2661 | match_icase = true;
|
|---|
| 2662 | break;
|
|---|
| 2663 |
|
|---|
| 2664 | case NO_IGNORE_CASE_OPTION:
|
|---|
| 2665 | match_icase = false;
|
|---|
| 2666 | break;
|
|---|
| 2667 |
|
|---|
| 2668 | case 'L':
|
|---|
| 2669 | /* Like -l, except list files that don't contain matches.
|
|---|
| 2670 | Inspired by the same option in Hume's gre. */
|
|---|
| 2671 | list_files = LISTFILES_NONMATCHING;
|
|---|
| 2672 | break;
|
|---|
| 2673 |
|
|---|
| 2674 | case 'l':
|
|---|
| 2675 | list_files = LISTFILES_MATCHING;
|
|---|
| 2676 | break;
|
|---|
| 2677 |
|
|---|
| 2678 | case 'm':
|
|---|
| 2679 | switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
|
|---|
| 2680 | {
|
|---|
| 2681 | case LONGINT_OK:
|
|---|
| 2682 | case LONGINT_OVERFLOW:
|
|---|
| 2683 | break;
|
|---|
| 2684 |
|
|---|
| 2685 | default:
|
|---|
| 2686 | die (EXIT_TROUBLE, 0, _("invalid max count"));
|
|---|
| 2687 | }
|
|---|
| 2688 | break;
|
|---|
| 2689 |
|
|---|
| 2690 | case 'n':
|
|---|
| 2691 | out_line = true;
|
|---|
| 2692 | break;
|
|---|
| 2693 |
|
|---|
| 2694 | case 'o':
|
|---|
| 2695 | only_matching = true;
|
|---|
| 2696 | break;
|
|---|
| 2697 |
|
|---|
| 2698 | case 'q':
|
|---|
| 2699 | exit_on_match = true;
|
|---|
| 2700 | exit_failure = 0;
|
|---|
| 2701 | break;
|
|---|
| 2702 |
|
|---|
| 2703 | case 'R':
|
|---|
| 2704 | fts_options = basic_fts_options | FTS_LOGICAL;
|
|---|
| 2705 | FALLTHROUGH;
|
|---|
| 2706 | case 'r':
|
|---|
| 2707 | directories = RECURSE_DIRECTORIES;
|
|---|
| 2708 | last_recursive = prev_optind;
|
|---|
| 2709 | break;
|
|---|
| 2710 |
|
|---|
| 2711 | case 's':
|
|---|
| 2712 | suppress_errors = true;
|
|---|
| 2713 | break;
|
|---|
| 2714 |
|
|---|
| 2715 | case 'v':
|
|---|
| 2716 | out_invert = true;
|
|---|
| 2717 | break;
|
|---|
| 2718 |
|
|---|
| 2719 | case 'w':
|
|---|
| 2720 | wordinit ();
|
|---|
| 2721 | match_words = true;
|
|---|
| 2722 | break;
|
|---|
| 2723 |
|
|---|
| 2724 | case 'x':
|
|---|
| 2725 | match_lines = true;
|
|---|
| 2726 | break;
|
|---|
| 2727 |
|
|---|
| 2728 | case 'Z':
|
|---|
| 2729 | filename_mask = 0;
|
|---|
| 2730 | break;
|
|---|
| 2731 |
|
|---|
| 2732 | case 'z':
|
|---|
| 2733 | eolbyte = '\0';
|
|---|
| 2734 | break;
|
|---|
| 2735 |
|
|---|
| 2736 | case BINARY_FILES_OPTION:
|
|---|
| 2737 | if (STREQ (optarg, "binary"))
|
|---|
| 2738 | binary_files = BINARY_BINARY_FILES;
|
|---|
| 2739 | else if (STREQ (optarg, "text"))
|
|---|
| 2740 | binary_files = TEXT_BINARY_FILES;
|
|---|
| 2741 | else if (STREQ (optarg, "without-match"))
|
|---|
| 2742 | binary_files = WITHOUT_MATCH_BINARY_FILES;
|
|---|
| 2743 | else
|
|---|
| 2744 | die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
|
|---|
| 2745 | break;
|
|---|
| 2746 |
|
|---|
| 2747 | case COLOR_OPTION:
|
|---|
| 2748 | if (optarg)
|
|---|
| 2749 | {
|
|---|
| 2750 | if (!c_strcasecmp (optarg, "always")
|
|---|
| 2751 | || !c_strcasecmp (optarg, "yes")
|
|---|
| 2752 | || !c_strcasecmp (optarg, "force"))
|
|---|
| 2753 | color_option = 1;
|
|---|
| 2754 | else if (!c_strcasecmp (optarg, "never")
|
|---|
| 2755 | || !c_strcasecmp (optarg, "no")
|
|---|
| 2756 | || !c_strcasecmp (optarg, "none"))
|
|---|
| 2757 | color_option = 0;
|
|---|
| 2758 | else if (!c_strcasecmp (optarg, "auto")
|
|---|
| 2759 | || !c_strcasecmp (optarg, "tty")
|
|---|
| 2760 | || !c_strcasecmp (optarg, "if-tty"))
|
|---|
| 2761 | color_option = 2;
|
|---|
| 2762 | else
|
|---|
| 2763 | show_help = 1;
|
|---|
| 2764 | }
|
|---|
| 2765 | else
|
|---|
| 2766 | color_option = 2;
|
|---|
| 2767 | break;
|
|---|
| 2768 |
|
|---|
| 2769 | case EXCLUDE_OPTION:
|
|---|
| 2770 | case INCLUDE_OPTION:
|
|---|
| 2771 | for (int cmd = 0; cmd < 2; cmd++)
|
|---|
| 2772 | {
|
|---|
| 2773 | if (!excluded_patterns[cmd])
|
|---|
| 2774 | excluded_patterns[cmd] = new_exclude ();
|
|---|
| 2775 | add_exclude (excluded_patterns[cmd], optarg,
|
|---|
| 2776 | ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
|
|---|
| 2777 | | exclude_options (cmd)));
|
|---|
| 2778 | }
|
|---|
| 2779 | break;
|
|---|
| 2780 | case EXCLUDE_FROM_OPTION:
|
|---|
| 2781 | for (int cmd = 0; cmd < 2; cmd++)
|
|---|
| 2782 | {
|
|---|
| 2783 | if (!excluded_patterns[cmd])
|
|---|
| 2784 | excluded_patterns[cmd] = new_exclude ();
|
|---|
| 2785 | if (add_exclude_file (add_exclude, excluded_patterns[cmd],
|
|---|
| 2786 | optarg, exclude_options (cmd), '\n')
|
|---|
| 2787 | != 0)
|
|---|
| 2788 | die (EXIT_TROUBLE, errno, "%s", optarg);
|
|---|
| 2789 | }
|
|---|
| 2790 | break;
|
|---|
| 2791 |
|
|---|
| 2792 | case EXCLUDE_DIRECTORY_OPTION:
|
|---|
| 2793 | strip_trailing_slashes (optarg);
|
|---|
| 2794 | for (int cmd = 0; cmd < 2; cmd++)
|
|---|
| 2795 | {
|
|---|
| 2796 | if (!excluded_directory_patterns[cmd])
|
|---|
| 2797 | excluded_directory_patterns[cmd] = new_exclude ();
|
|---|
| 2798 | add_exclude (excluded_directory_patterns[cmd], optarg,
|
|---|
| 2799 | exclude_options (cmd));
|
|---|
| 2800 | }
|
|---|
| 2801 | break;
|
|---|
| 2802 |
|
|---|
| 2803 | case GROUP_SEPARATOR_OPTION:
|
|---|
| 2804 | group_separator = optarg;
|
|---|
| 2805 | break;
|
|---|
| 2806 |
|
|---|
| 2807 | case LINE_BUFFERED_OPTION:
|
|---|
| 2808 | line_buffered = true;
|
|---|
| 2809 | break;
|
|---|
| 2810 |
|
|---|
| 2811 | case LABEL_OPTION:
|
|---|
| 2812 | label = optarg;
|
|---|
| 2813 | break;
|
|---|
| 2814 |
|
|---|
| 2815 | case 0:
|
|---|
| 2816 | /* long options */
|
|---|
| 2817 | break;
|
|---|
| 2818 |
|
|---|
| 2819 | default:
|
|---|
| 2820 | usage (EXIT_TROUBLE);
|
|---|
| 2821 | break;
|
|---|
| 2822 |
|
|---|
| 2823 | }
|
|---|
| 2824 |
|
|---|
| 2825 | if (show_version)
|
|---|
| 2826 | {
|
|---|
| 2827 | version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
|
|---|
| 2828 | (char *) NULL);
|
|---|
| 2829 | puts (_("Written by Mike Haertel and others; see\n"
|
|---|
| 2830 | "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
|
|---|
| 2831 | return EXIT_SUCCESS;
|
|---|
| 2832 | }
|
|---|
| 2833 |
|
|---|
| 2834 | if (show_help)
|
|---|
| 2835 | usage (EXIT_SUCCESS);
|
|---|
| 2836 |
|
|---|
| 2837 | if (keys)
|
|---|
| 2838 | {
|
|---|
| 2839 | if (keycc == 0)
|
|---|
| 2840 | {
|
|---|
| 2841 | /* No keys were specified (e.g. -f /dev/null). Match nothing. */
|
|---|
| 2842 | out_invert ^= true;
|
|---|
| 2843 | match_lines = match_words = false;
|
|---|
| 2844 | keys[keycc++] = '\n';
|
|---|
| 2845 | }
|
|---|
| 2846 | }
|
|---|
| 2847 | else if (optind < argc)
|
|---|
| 2848 | {
|
|---|
| 2849 | /* Make a copy so that it can be reallocated or freed later. */
|
|---|
| 2850 | pattern_array = keys = xstrdup (argv[optind++]);
|
|---|
| 2851 | ptrdiff_t patlen = strlen (keys);
|
|---|
| 2852 | keys[patlen] = '\n';
|
|---|
| 2853 | keycc = update_patterns (keys, 0, patlen + 1, "");
|
|---|
| 2854 | }
|
|---|
| 2855 | else
|
|---|
| 2856 | usage (EXIT_TROUBLE);
|
|---|
| 2857 |
|
|---|
| 2858 | /* Strip trailing newline from keys. */
|
|---|
| 2859 | keycc--;
|
|---|
| 2860 |
|
|---|
| 2861 | hash_free (pattern_table);
|
|---|
| 2862 |
|
|---|
| 2863 | bool possibly_tty = false;
|
|---|
| 2864 | struct stat tmp_stat;
|
|---|
| 2865 | if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
|
|---|
| 2866 | {
|
|---|
| 2867 | if (S_ISREG (tmp_stat.st_mode))
|
|---|
| 2868 | out_stat = tmp_stat;
|
|---|
| 2869 | else if (S_ISCHR (tmp_stat.st_mode))
|
|---|
| 2870 | {
|
|---|
| 2871 | struct stat null_stat;
|
|---|
| 2872 | if (stat ("/dev/null", &null_stat) == 0
|
|---|
| 2873 | && SAME_INODE (tmp_stat, null_stat))
|
|---|
| 2874 | dev_null_output = true;
|
|---|
| 2875 | else
|
|---|
| 2876 | possibly_tty = true;
|
|---|
| 2877 | }
|
|---|
| 2878 | }
|
|---|
| 2879 |
|
|---|
| 2880 | /* POSIX says -c, -l and -q are mutually exclusive. In this
|
|---|
| 2881 | implementation, -q overrides -l and -L, which in turn override -c. */
|
|---|
| 2882 | if (exit_on_match | dev_null_output)
|
|---|
| 2883 | list_files = LISTFILES_NONE;
|
|---|
| 2884 | if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
|
|---|
| 2885 | {
|
|---|
| 2886 | count_matches = false;
|
|---|
| 2887 | done_on_match = true;
|
|---|
| 2888 | }
|
|---|
| 2889 | out_quiet = count_matches | done_on_match;
|
|---|
| 2890 |
|
|---|
| 2891 | if (out_after < 0)
|
|---|
| 2892 | out_after = default_context;
|
|---|
| 2893 | if (out_before < 0)
|
|---|
| 2894 | out_before = default_context;
|
|---|
| 2895 |
|
|---|
| 2896 | /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
|
|---|
| 2897 | /dev/null'), fail without reading the input. */
|
|---|
| 2898 | if ((max_count == 0
|
|---|
| 2899 | || (keycc == 0 && out_invert && !match_lines && !match_words))
|
|---|
| 2900 | && list_files != LISTFILES_NONMATCHING)
|
|---|
| 2901 | return EXIT_FAILURE;
|
|---|
| 2902 |
|
|---|
| 2903 | if (color_option == 2)
|
|---|
| 2904 | color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
|
|---|
| 2905 | init_colorize ();
|
|---|
| 2906 |
|
|---|
| 2907 | if (color_option)
|
|---|
| 2908 | {
|
|---|
| 2909 | /* Legacy. */
|
|---|
| 2910 | char *userval = getenv ("GREP_COLOR");
|
|---|
| 2911 | if (userval != NULL && *userval != '\0')
|
|---|
| 2912 | selected_match_color = context_match_color = userval;
|
|---|
| 2913 |
|
|---|
| 2914 | /* New GREP_COLORS has priority. */
|
|---|
| 2915 | parse_grep_colors ();
|
|---|
| 2916 | }
|
|---|
| 2917 |
|
|---|
| 2918 | initialize_unibyte_mask ();
|
|---|
| 2919 |
|
|---|
| 2920 | if (matcher < 0)
|
|---|
| 2921 | matcher = G_MATCHER_INDEX;
|
|---|
| 2922 |
|
|---|
| 2923 | if (matcher == F_MATCHER_INDEX
|
|---|
| 2924 | || matcher == E_MATCHER_INDEX || matcher == G_MATCHER_INDEX)
|
|---|
| 2925 | {
|
|---|
| 2926 | if (match_icase)
|
|---|
| 2927 | setup_ok_fold ();
|
|---|
| 2928 |
|
|---|
| 2929 | /* In a single-byte locale, switch from -F to -G if it is a single
|
|---|
| 2930 | pattern that matches words, where -G is typically faster. In a
|
|---|
| 2931 | multibyte locale, switch if the patterns have an encoding error
|
|---|
| 2932 | (where -F does not work) or if -i and the patterns will not work
|
|---|
| 2933 | for -iF. */
|
|---|
| 2934 | if (matcher == F_MATCHER_INDEX)
|
|---|
| 2935 | {
|
|---|
| 2936 | if (! localeinfo.multibyte
|
|---|
| 2937 | ? n_patterns == 1 && match_words
|
|---|
| 2938 | : (contains_encoding_error (keys, keycc)
|
|---|
| 2939 | || (match_icase && !fgrep_icase_available (keys, keycc))))
|
|---|
| 2940 | {
|
|---|
| 2941 | fgrep_to_grep_pattern (&pattern_array, &keycc);
|
|---|
| 2942 | keys = pattern_array;
|
|---|
| 2943 | matcher = G_MATCHER_INDEX;
|
|---|
| 2944 | }
|
|---|
| 2945 | }
|
|---|
| 2946 | /* With two or more patterns, if -F works then switch from either -E
|
|---|
| 2947 | or -G, as -F is probably faster then. */
|
|---|
| 2948 | else if (1 < n_patterns)
|
|---|
| 2949 | matcher = try_fgrep_pattern (matcher, keys, &keycc);
|
|---|
| 2950 | }
|
|---|
| 2951 |
|
|---|
| 2952 | execute = matchers[matcher].execute;
|
|---|
| 2953 | compiled_pattern =
|
|---|
| 2954 | matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
|
|---|
| 2955 | only_matching | color_option);
|
|---|
| 2956 | /* We need one byte prior and one after. */
|
|---|
| 2957 | char eolbytes[3] = { 0, eolbyte, 0 };
|
|---|
| 2958 | size_t match_size;
|
|---|
| 2959 | skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
|
|---|
| 2960 | &match_size, NULL) == 0)
|
|---|
| 2961 | == out_invert);
|
|---|
| 2962 |
|
|---|
| 2963 | int num_operands = argc - optind;
|
|---|
| 2964 | out_file = (filename_option == 0 && num_operands <= 1
|
|---|
| 2965 | ? - (directories == RECURSE_DIRECTORIES)
|
|---|
| 2966 | : 0 <= filename_option);
|
|---|
| 2967 |
|
|---|
| 2968 | if (binary)
|
|---|
| 2969 | xset_binary_mode (STDOUT_FILENO, O_BINARY);
|
|---|
| 2970 |
|
|---|
| 2971 | /* Prefer sysconf for page size, as getpagesize typically returns int. */
|
|---|
| 2972 | #ifdef _SC_PAGESIZE
|
|---|
| 2973 | long psize = sysconf (_SC_PAGESIZE);
|
|---|
| 2974 | #else
|
|---|
| 2975 | long psize = getpagesize ();
|
|---|
| 2976 | #endif
|
|---|
| 2977 | if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
|
|---|
| 2978 | abort ();
|
|---|
| 2979 | pagesize = psize;
|
|---|
| 2980 | bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
|
|---|
| 2981 | buffer = xmalloc (bufalloc);
|
|---|
| 2982 |
|
|---|
| 2983 | if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
|
|---|
| 2984 | devices = READ_DEVICES;
|
|---|
| 2985 |
|
|---|
| 2986 | char *const *files;
|
|---|
| 2987 | if (0 < num_operands)
|
|---|
| 2988 | {
|
|---|
| 2989 | files = argv + optind;
|
|---|
| 2990 | }
|
|---|
| 2991 | else if (directories == RECURSE_DIRECTORIES && 0 < last_recursive)
|
|---|
| 2992 | {
|
|---|
| 2993 | static char *const cwd_only[] = { (char *) ".", NULL };
|
|---|
| 2994 | files = cwd_only;
|
|---|
| 2995 | omit_dot_slash = true;
|
|---|
| 2996 | }
|
|---|
| 2997 | else
|
|---|
| 2998 | {
|
|---|
| 2999 | static char *const stdin_only[] = { (char *) "-", NULL };
|
|---|
| 3000 | files = stdin_only;
|
|---|
| 3001 | }
|
|---|
| 3002 |
|
|---|
| 3003 | bool status = true;
|
|---|
| 3004 | do
|
|---|
| 3005 | status &= grep_command_line_arg (*files++);
|
|---|
| 3006 | while (*files != NULL);
|
|---|
| 3007 |
|
|---|
| 3008 | /* We register via atexit to test stdout. */
|
|---|
| 3009 | return errseen ? EXIT_TROUBLE : status;
|
|---|
| 3010 | }
|
|---|