source: trunk/diffutils/src/diff.c@ 2561

Last change on this file since 2561 was 2556, checked in by bird, 20 years ago

diffutils 2.8.1

File size: 36.4 KB
Line 
1/* diff - compare files line by line
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002
4 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 See the GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU DIFF; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#define GDIFF_MAIN
24#include "diff.h"
25#include <c-stack.h>
26#include <dirname.h>
27#include <error.h>
28#include <exclude.h>
29#include <exitfail.h>
30#include <fnmatch.h>
31#include <freesoft.h>
32#include <getopt.h>
33#include <hard-locale.h>
34#include <prepargs.h>
35#include <quotesys.h>
36#include <regex.h>
37#include <setmode.h>
38#include <xalloc.h>
39
40static char const authorship_msgid[] =
41 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\
42Richard Stallman, and Len Tower.");
43
44static char const copyright_string[] =
45 "Copyright (C) 2002 Free Software Foundation, Inc.";
46
47#ifndef GUTTER_WIDTH_MINIMUM
48# define GUTTER_WIDTH_MINIMUM 3
49#endif
50
51struct regexp_list
52{
53 char *regexps; /* chars representing disjunction of the regexps */
54 size_t len; /* chars used in `regexps' */
55 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */
56 bool multiple_regexps;/* Does `regexps' represent a disjunction? */
57 struct re_pattern_buffer *buf;
58};
59
60static int compare_files (struct comparison const *, char const *, char const *);
61static void add_regexp (struct regexp_list *, char const *);
62static void summarize_regexp_list (struct regexp_list *);
63static void specify_style (enum output_style);
64static void specify_value (char const **, char const *, char const *);
65static void try_help (char const *, char const *) __attribute__((noreturn));
66static void check_stdout (void);
67static void usage (void);
68
69/* If comparing directories, compare their common subdirectories
70 recursively. */
71static bool recursive;
72
73/* In context diffs, show previous lines that match these regexps. */
74static struct regexp_list function_regexp_list;
75
76/* Ignore changes affecting only lines that match these regexps. */
77static struct regexp_list ignore_regexp_list;
78
79#if HAVE_SETMODE_DOS
80/* Use binary I/O when reading and writing data (--binary).
81 On POSIX hosts, this has no effect. */
82static bool binary;
83#endif
84
85/* When comparing directories, if a file appears only in one
86 directory, treat it as present but empty in the other (-N).
87 Then `patch' would create the file with appropriate contents. */
88static bool new_file;
89
90/* When comparing directories, if a file appears only in the second
91 directory of the two, treat it as present but empty in the other
92 (--unidirectional-new-file).
93 Then `patch' would create the file with appropriate contents. */
94static bool unidirectional_new_file;
95
96/* Report files compared that are the same (-s).
97 Normally nothing is output when that happens. */
98static bool report_identical_files;
99
100
101/* Return a string containing the command options with which diff was invoked.
102 Spaces appear between what were separate ARGV-elements.
103 There is a space at the beginning but none at the end.
104 If there were no options, the result is an empty string.
105
106 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
107 the length of that vector. */
108
109static char *
110option_list (char **optionvec, int count)
111{
112 int i;
113 size_t size = 1;
114 char *result;
115 char *p;
116
117 for (i = 0; i < count; i++)
118 size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
119
120 p = result = xmalloc (size);
121
122 for (i = 0; i < count; i++)
123 {
124 *p++ = ' ';
125 p += quote_system_arg (p, optionvec[i]);
126 }
127
128 *p = 0;
129 return result;
130}
131
132
133/* Return an option value suitable for add_exclude. */
134
135static int
136exclude_options (void)
137{
138 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
139}
140
141
142static char const shortopts[] =
143"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
144
145/* Values for long options that do not have single-letter equivalents. */
146enum
147{
148 BINARY_OPTION = CHAR_MAX + 1,
149 FROM_FILE_OPTION,
150 HELP_OPTION,
151 HORIZON_LINES_OPTION,
152 IGNORE_FILE_NAME_CASE_OPTION,
153 INHIBIT_HUNK_MERGE_OPTION,
154 LEFT_COLUMN_OPTION,
155 LINE_FORMAT_OPTION,
156 NO_IGNORE_FILE_NAME_CASE_OPTION,
157 NORMAL_OPTION,
158 SDIFF_MERGE_ASSIST_OPTION,
159 STRIP_TRAILING_CR_OPTION,
160 SUPPRESS_COMMON_LINES_OPTION,
161 TO_FILE_OPTION,
162
163 /* These options must be in sequence. */
164 UNCHANGED_LINE_FORMAT_OPTION,
165 OLD_LINE_FORMAT_OPTION,
166 NEW_LINE_FORMAT_OPTION,
167
168 /* These options must be in sequence. */
169 UNCHANGED_GROUP_FORMAT_OPTION,
170 OLD_GROUP_FORMAT_OPTION,
171 NEW_GROUP_FORMAT_OPTION,
172 CHANGED_GROUP_FORMAT_OPTION
173};
174
175static char const group_format_option[][sizeof "--unchanged-group-format"] =
176 {
177 "--unchanged-group-format",
178 "--old-group-format",
179 "--new-group-format",
180 "--changed-group-format"
181 };
182
183static char const line_format_option[][sizeof "--unchanged-line-format"] =
184 {
185 "--unchanged-line-format",
186 "--old-line-format",
187 "--new-line-format"
188 };
189
190static struct option const longopts[] =
191{
192 {"binary", 0, 0, BINARY_OPTION},
193 {"brief", 0, 0, 'q'},
194 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
195 {"context", 2, 0, 'C'},
196 {"ed", 0, 0, 'e'},
197 {"exclude", 1, 0, 'x'},
198 {"exclude-from", 1, 0, 'X'},
199 {"expand-tabs", 0, 0, 't'},
200 {"forward-ed", 0, 0, 'f'},
201 {"from-file", 1, 0, FROM_FILE_OPTION},
202 {"help", 0, 0, HELP_OPTION},
203 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
204 {"ifdef", 1, 0, 'D'},
205 {"ignore-all-space", 0, 0, 'w'},
206 {"ignore-blank-lines", 0, 0, 'B'},
207 {"ignore-case", 0, 0, 'i'},
208 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
209 {"ignore-matching-lines", 1, 0, 'I'},
210 {"ignore-space-change", 0, 0, 'b'},
211 {"ignore-tab-expansion", 0, 0, 'E'},
212 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
213 {"initial-tab", 0, 0, 'T'},
214 {"label", 1, 0, 'L'},
215 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
216 {"line-format", 1, 0, LINE_FORMAT_OPTION},
217 {"minimal", 0, 0, 'd'},
218 {"new-file", 0, 0, 'N'},
219 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
220 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
221 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
222 {"normal", 0, 0, NORMAL_OPTION},
223 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
224 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
225 {"paginate", 0, 0, 'l'},
226 {"rcs", 0, 0, 'n'},
227 {"recursive", 0, 0, 'r'},
228 {"report-identical-files", 0, 0, 's'},
229 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
230 {"show-c-function", 0, 0, 'p'},
231 {"show-function-line", 1, 0, 'F'},
232 {"side-by-side", 0, 0, 'y'},
233 {"speed-large-files", 0, 0, 'H'},
234 {"starting-file", 1, 0, 'S'},
235 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
236 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
237 {"text", 0, 0, 'a'},
238 {"to-file", 1, 0, TO_FILE_OPTION},
239 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
240 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
241 {"unidirectional-new-file", 0, 0, 'P'},
242 {"unified", 2, 0, 'U'},
243 {"version", 0, 0, 'v'},
244 {"width", 1, 0, 'W'},
245 {0, 0, 0, 0}
246};
247
248int
249main (int argc, char **argv)
250{
251 int exit_status = EXIT_SUCCESS;
252 int c;
253 int i;
254 int prev = -1;
255 lin ocontext = -1;
256 bool explicit_context = 0;
257 int width = 0;
258 bool show_c_function = 0;
259 char const *from_file = 0;
260 char const *to_file = 0;
261 uintmax_t numval;
262 char *numend;
263
264 /* Do our initializations. */
265 exit_failure = 2;
266 initialize_main (&argc, &argv);
267 program_name = argv[0];
268 setlocale (LC_ALL, "");
269 bindtextdomain (PACKAGE, LOCALEDIR);
270 textdomain (PACKAGE);
271 c_stack_action (c_stack_die);
272 function_regexp_list.buf = &function_regexp;
273 ignore_regexp_list.buf = &ignore_regexp;
274 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
275 excluded = new_exclude ();
276
277 /* Decode the options. */
278
279 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
280 {
281 switch (c)
282 {
283 case 0:
284 break;
285
286 case '0':
287 case '1':
288 case '2':
289 case '3':
290 case '4':
291 case '5':
292 case '6':
293 case '7':
294 case '8':
295 case '9':
296 if (! ISDIGIT (prev))
297 ocontext = c - '0';
298 else if (LIN_MAX / 10 < ocontext
299 || ((ocontext = 10 * ocontext + c - '0') < 0))
300 ocontext = LIN_MAX;
301 break;
302
303 case 'a':
304 text = 1;
305 break;
306
307 case 'b':
308 if (ignore_white_space < IGNORE_SPACE_CHANGE)
309 ignore_white_space = IGNORE_SPACE_CHANGE;
310 break;
311
312 case 'B':
313 ignore_blank_lines = 1;
314 break;
315
316 case 'C': /* +context[=lines] */
317 case 'U': /* +unified[=lines] */
318 {
319 if (optarg)
320 {
321 numval = strtoumax (optarg, &numend, 10);
322 if (*numend)
323 try_help ("invalid context length `%s'", optarg);
324 if (LIN_MAX < numval)
325 numval = LIN_MAX;
326 }
327 else
328 numval = 3;
329
330 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
331 if (context < numval)
332 context = numval;
333 explicit_context = 1;
334 }
335 break;
336
337 case 'c':
338 specify_style (OUTPUT_CONTEXT);
339 if (context < 3)
340 context = 3;
341 break;
342
343 case 'd':
344 minimal = 1;
345 break;
346
347 case 'D':
348 specify_style (OUTPUT_IFDEF);
349 {
350 static char const C_ifdef_group_formats[] =
351 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
352 char *b = xmalloc (sizeof C_ifdef_group_formats
353 + 7 * strlen (optarg) - 14 /* 7*"%s" */
354 - 8 /* 5*"%%" + 3*"%c" */);
355 sprintf (b, C_ifdef_group_formats,
356 0,
357 optarg, optarg, 0,
358 optarg, optarg, 0,
359 optarg, optarg, optarg);
360 for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
361 {
362 specify_value (&group_format[i], b, "-D");
363 b += strlen (b) + 1;
364 }
365 }
366 break;
367
368 case 'e':
369 specify_style (OUTPUT_ED);
370 break;
371
372 case 'E':
373 if (ignore_white_space < IGNORE_TAB_EXPANSION)
374 ignore_white_space = IGNORE_TAB_EXPANSION;
375 break;
376
377 case 'f':
378 specify_style (OUTPUT_FORWARD_ED);
379 break;
380
381 case 'F':
382 add_regexp (&function_regexp_list, optarg);
383 break;
384
385 case 'h':
386 /* Split the files into chunks for faster processing.
387 Usually does not change the result.
388
389 This currently has no effect. */
390 break;
391
392 case 'H':
393 speed_large_files = 1;
394 break;
395
396 case 'i':
397 ignore_case = 1;
398 break;
399
400 case 'I':
401 add_regexp (&ignore_regexp_list, optarg);
402 break;
403
404 case 'l':
405 if (!pr_program[0])
406 try_help ("pagination not supported on this host", 0);
407 paginate = 1;
408#ifdef SIGCHLD
409 /* Pagination requires forking and waiting, and
410 System V fork+wait does not work if SIGCHLD is ignored. */
411 signal (SIGCHLD, SIG_DFL);
412#endif
413 break;
414
415 case 'L':
416 if (!file_label[0])
417 file_label[0] = optarg;
418 else if (!file_label[1])
419 file_label[1] = optarg;
420 else
421 fatal ("too many file label options");
422 break;
423
424 case 'n':
425 specify_style (OUTPUT_RCS);
426 break;
427
428 case 'N':
429 new_file = 1;
430 break;
431
432 case 'p':
433 show_c_function = 1;
434 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
435 break;
436
437 case 'P':
438 unidirectional_new_file = 1;
439 break;
440
441 case 'q':
442 brief = 1;
443 break;
444
445 case 'r':
446 recursive = 1;
447 break;
448
449 case 's':
450 report_identical_files = 1;
451 break;
452
453 case 'S':
454 specify_value (&starting_file, optarg, "-S");
455 break;
456
457 case 't':
458 expand_tabs = 1;
459 break;
460
461 case 'T':
462 initial_tab = 1;
463 break;
464
465 case 'u':
466 specify_style (OUTPUT_UNIFIED);
467 if (context < 3)
468 context = 3;
469 break;
470
471 case 'v':
472 printf ("diff %s\n%s\n\n%s\n\n%s\n",
473 version_string, copyright_string,
474 _(free_software_msgid), _(authorship_msgid));
475 check_stdout ();
476 return EXIT_SUCCESS;
477
478 case 'w':
479 ignore_white_space = IGNORE_ALL_SPACE;
480 break;
481
482 case 'x':
483 add_exclude (excluded, optarg, exclude_options ());
484 break;
485
486 case 'X':
487 if (add_exclude_file (add_exclude, excluded, optarg,
488 exclude_options (), '\n'))
489 pfatal_with_name (optarg);
490 break;
491
492 case 'y':
493 specify_style (OUTPUT_SDIFF);
494 break;
495
496 case 'W':
497 numval = strtoumax (optarg, &numend, 10);
498 if (! (0 < numval && numval <= INT_MAX) || *numend)
499 try_help ("invalid width `%s'", optarg);
500 if (width != numval)
501 {
502 if (width)
503 fatal ("conflicting width options");
504 width = numval;
505 }
506 break;
507
508 case BINARY_OPTION:
509#if HAVE_SETMODE_DOS
510 binary = 1;
511 set_binary_mode (STDOUT_FILENO, 1);
512#endif
513 break;
514
515 case FROM_FILE_OPTION:
516 specify_value (&from_file, optarg, "--from-file");
517 break;
518
519 case HELP_OPTION:
520 usage ();
521 check_stdout ();
522 return EXIT_SUCCESS;
523
524 case HORIZON_LINES_OPTION:
525 numval = strtoumax (optarg, &numend, 10);
526 if (*numend)
527 try_help ("invalid horizon length `%s'", optarg);
528 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
529 break;
530
531 case IGNORE_FILE_NAME_CASE_OPTION:
532 ignore_file_name_case = 1;
533 break;
534
535 case INHIBIT_HUNK_MERGE_OPTION:
536 /* This option is obsolete, but accept it for backward
537 compatibility. */
538 break;
539
540 case LEFT_COLUMN_OPTION:
541 left_column = 1;
542 break;
543
544 case LINE_FORMAT_OPTION:
545 specify_style (OUTPUT_IFDEF);
546 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
547 specify_value (&line_format[i], optarg, "--line-format");
548 break;
549
550 case NO_IGNORE_FILE_NAME_CASE_OPTION:
551 ignore_file_name_case = 0;
552 break;
553
554 case NORMAL_OPTION:
555 specify_style (OUTPUT_NORMAL);
556 break;
557
558 case SDIFF_MERGE_ASSIST_OPTION:
559 specify_style (OUTPUT_SDIFF);
560 sdiff_merge_assist = 1;
561 break;
562
563 case STRIP_TRAILING_CR_OPTION:
564 strip_trailing_cr = 1;
565 break;
566
567 case SUPPRESS_COMMON_LINES_OPTION:
568 suppress_common_lines = 1;
569 break;
570
571 case TO_FILE_OPTION:
572 specify_value (&to_file, optarg, "--to-file");
573 break;
574
575 case UNCHANGED_LINE_FORMAT_OPTION:
576 case OLD_LINE_FORMAT_OPTION:
577 case NEW_LINE_FORMAT_OPTION:
578 specify_style (OUTPUT_IFDEF);
579 c -= UNCHANGED_LINE_FORMAT_OPTION;
580 specify_value (&line_format[c], optarg, line_format_option[c]);
581 break;
582
583 case UNCHANGED_GROUP_FORMAT_OPTION:
584 case OLD_GROUP_FORMAT_OPTION:
585 case NEW_GROUP_FORMAT_OPTION:
586 case CHANGED_GROUP_FORMAT_OPTION:
587 specify_style (OUTPUT_IFDEF);
588 c -= UNCHANGED_GROUP_FORMAT_OPTION;
589 specify_value (&group_format[c], optarg, group_format_option[c]);
590 break;
591
592 default:
593 try_help (0, 0);
594 }
595 prev = c;
596 }
597
598 if (output_style == OUTPUT_UNSPECIFIED)
599 {
600 if (show_c_function)
601 {
602 specify_style (OUTPUT_CONTEXT);
603 if (ocontext < 0)
604 context = 3;
605 }
606 else
607 specify_style (OUTPUT_NORMAL);
608 }
609
610 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
611 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
612 else
613 {
614 /* See POSIX 1003.1-2001 for this format. */
615 time_format = "%a %b %e %T %Y";
616 }
617
618 if (0 <= ocontext)
619 {
620 bool modern_usage = 200112 <= posix2_version ();
621
622 if ((output_style == OUTPUT_CONTEXT
623 || output_style == OUTPUT_UNIFIED)
624 && (context < ocontext
625 || (ocontext < context && ! explicit_context)))
626 {
627 if (modern_usage)
628 {
629 error (0, 0,
630 _("`-%ld' option is obsolete; use `-%c %ld'"),
631 (long) ocontext,
632 output_style == OUTPUT_CONTEXT ? 'C' : 'U',
633 (long) ocontext);
634 try_help (0, 0);
635 }
636 context = ocontext;
637 }
638 else
639 {
640 if (modern_usage)
641 {
642 error (0, 0, _("`-%ld' option is obsolete; omit it"),
643 (long) ocontext);
644 try_help (0, 0);
645 }
646 }
647 }
648
649 {
650 /*
651 * We maximize first the half line width, and then the gutter width,
652 * according to the following constraints:
653 * 1. Two half lines plus a gutter must fit in a line.
654 * 2. If the half line width is nonzero:
655 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
656 * b. If tabs are not expanded to spaces,
657 * a half line plus a gutter is an integral number of tabs,
658 * so that tabs in the right column line up.
659 */
660 unsigned int t = expand_tabs ? 1 : TAB_WIDTH;
661 int w = width ? width : 130;
662 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
663 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
664 sdiff_column2_offset = sdiff_half_width ? off : w;
665 }
666
667 /* Make the horizon at least as large as the context, so that
668 shift_boundaries has more freedom to shift the first and last hunks. */
669 if (horizon_lines < context)
670 horizon_lines = context;
671
672 summarize_regexp_list (&function_regexp_list);
673 summarize_regexp_list (&ignore_regexp_list);
674
675 if (output_style == OUTPUT_IFDEF)
676 {
677 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
678 if (!line_format[i])
679 line_format[i] = "%l\n";
680 if (!group_format[OLD])
681 group_format[OLD]
682 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
683 if (!group_format[NEW])
684 group_format[NEW]
685 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
686 if (!group_format[UNCHANGED])
687 group_format[UNCHANGED] = "%=";
688 if (!group_format[CHANGED])
689 group_format[CHANGED] = concat (group_format[OLD],
690 group_format[NEW], "");
691 }
692
693 no_diff_means_no_output =
694 (output_style == OUTPUT_IFDEF ?
695 (!*group_format[UNCHANGED]
696 || (strcmp (group_format[UNCHANGED], "%=") == 0
697 && !*line_format[UNCHANGED]))
698 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
699
700 files_can_be_treated_as_binary =
701 (brief
702 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
703 | (ignore_regexp_list.regexps || ignore_white_space)));
704
705 switch_string = option_list (argv + 1, optind - 1);
706
707 if (from_file)
708 {
709 if (to_file)
710 fatal ("--from-file and --to-file both specified");
711 else
712 for (; optind < argc; optind++)
713 {
714 int status = compare_files ((struct comparison *) 0,
715 from_file, argv[optind]);
716 if (exit_status < status)
717 exit_status = status;
718 }
719 }
720 else
721 {
722 if (to_file)
723 for (; optind < argc; optind++)
724 {
725 int status = compare_files ((struct comparison *) 0,
726 argv[optind], to_file);
727 if (exit_status < status)
728 exit_status = status;
729 }
730 else
731 {
732 if (argc - optind != 2)
733 {
734 if (argc - optind < 2)
735 try_help ("missing operand after `%s'", argv[argc - 1]);
736 else
737 try_help ("extra operand `%s'", argv[optind + 2]);
738 }
739
740 exit_status = compare_files ((struct comparison *) 0,
741 argv[optind], argv[optind + 1]);
742 }
743 }
744
745 /* Print any messages that were saved up for last. */
746 print_message_queue ();
747
748 check_stdout ();
749 exit (exit_status);
750 return exit_status;
751}
752
753/* Append to REGLIST the regexp PATTERN. */
754
755static void
756add_regexp (struct regexp_list *reglist, char const *pattern)
757{
758 size_t patlen = strlen (pattern);
759 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
760
761 if (m != 0)
762 error (0, 0, "%s: %s", pattern, m);
763 else
764 {
765 char *regexps = reglist->regexps;
766 size_t len = reglist->len;
767 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
768 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
769 size_t size = reglist->size;
770
771 if (size <= newlen)
772 {
773 if (!size)
774 size = 1;
775
776 do size *= 2;
777 while (size <= newlen);
778
779 reglist->size = size;
780 reglist->regexps = regexps = xrealloc (regexps, size);
781 }
782 if (multiple_regexps)
783 {
784 regexps[len++] = '\\';
785 regexps[len++] = '|';
786 }
787 memcpy (regexps + len, pattern, patlen + 1);
788 }
789}
790
791/* Ensure that REGLIST represents the disjunction of its regexps.
792 This is done here, rather than earlier, to avoid O(N^2) behavior. */
793
794static void
795summarize_regexp_list (struct regexp_list *reglist)
796{
797 if (reglist->regexps)
798 {
799 /* At least one regexp was specified. Allocate a fastmap for it. */
800 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
801 if (reglist->multiple_regexps)
802 {
803 /* Compile the disjunction of the regexps.
804 (If just one regexp was specified, it is already compiled.) */
805 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
806 reglist->buf);
807 if (m != 0)
808 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
809 }
810 }
811}
812
813static void
814try_help (char const *reason_msgid, char const *operand)
815{
816 if (reason_msgid)
817 error (0, 0, _(reason_msgid), operand);
818 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
819 program_name);
820 abort ();
821}
822
823static void
824check_stdout (void)
825{
826 if (ferror (stdout))
827 fatal ("write failed");
828 else if (fclose (stdout) != 0)
829 pfatal_with_name (_("standard output"));
830}
831
832static char const * const option_help_msgid[] = {
833 N_("Compare files line by line."),
834 "",
835 N_("-i --ignore-case Ignore case differences in file contents."),
836 N_("--ignore-file-name-case Ignore case when comparing file names."),
837 N_("--no-ignore-file-name-case Consider case when comparing file names."),
838 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."),
839 N_("-b --ignore-space-change Ignore changes in the amount of white space."),
840 N_("-w --ignore-all-space Ignore all white space."),
841 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."),
842 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."),
843 N_("--strip-trailing-cr Strip trailing carriage return on input."),
844#if HAVE_SETMODE_DOS
845 N_("--binary Read and write data in binary mode."),
846#endif
847 N_("-a --text Treat all files as text."),
848 "",
849 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\
850-u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\
851 --label LABEL Use LABEL instead of file name.\n\
852 -p --show-c-function Show which C function each change is in.\n\
853 -F RE --show-function-line=RE Show the most recent line matching RE."),
854 N_("-q --brief Output only whether files differ."),
855 N_("-e --ed Output an ed script."),
856 N_("--normal Output a normal diff."),
857 N_("-n --rcs Output an RCS format diff."),
858 N_("-y --side-by-side Output in two columns.\n\
859 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\
860 --left-column Output only the left column of common lines.\n\
861 --suppress-common-lines Do not output common lines."),
862 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."),
863 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."),
864 N_("--line-format=LFMT Similar, but format all input lines with LFMT."),
865 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."),
866 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."),
867 N_(" GFMT may contain:\n\
868 %< lines from FILE1\n\
869 %> lines from FILE2\n\
870 %= lines common to FILE1 and FILE2\n\
871 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
872 LETTERs are as follows for new group, lower case for old group:\n\
873 F first line number\n\
874 L last line number\n\
875 N number of lines = L-F+1\n\
876 E F-1\n\
877 M L+1"),
878 N_(" LFMT may contain:\n\
879 %L contents of line\n\
880 %l contents of line, excluding any trailing newline\n\
881 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
882 N_(" Either GFMT or LFMT may contain:\n\
883 %% %\n\
884 %c'C' the single character C\n\
885 %c'\\OOO' the character with octal code OOO"),
886 "",
887 N_("-l --paginate Pass the output through `pr' to paginate it."),
888 N_("-t --expand-tabs Expand tabs to spaces in output."),
889 N_("-T --initial-tab Make tabs line up by prepending a tab."),
890 "",
891 N_("-r --recursive Recursively compare any subdirectories found."),
892 N_("-N --new-file Treat absent files as empty."),
893 N_("--unidirectional-new-file Treat absent first files as empty."),
894 N_("-s --report-identical-files Report when two files are the same."),
895 N_("-x PAT --exclude=PAT Exclude files that match PAT."),
896 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."),
897 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."),
898 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."),
899 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."),
900 "",
901 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."),
902 N_("-d --minimal Try hard to find a smaller set of changes."),
903 N_("--speed-large-files Assume large files and many scattered small changes."),
904 "",
905 N_("-v --version Output version info."),
906 N_("--help Output this help."),
907 "",
908 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
909 N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
910 N_("If a FILE is `-', read standard input."),
911 "",
912 N_("Report bugs to <bug-gnu-utils@gnu.org>."),
913 0
914};
915
916static void
917usage (void)
918{
919 char const * const *p;
920
921 printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
922
923 for (p = option_help_msgid; *p; p++)
924 {
925 if (!**p)
926 putchar ('\n');
927 else
928 {
929 char const *msg = _(*p);
930 char const *nl;
931 while ((nl = strchr (msg, '\n')))
932 {
933 int msglen = nl + 1 - msg;
934 printf (" %.*s", msglen, msg);
935 msg = nl + 1;
936 }
937
938 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
939 }
940 }
941}
942
943/* Set VAR to VALUE, reporting an OPTION error if this is a
944 conflict. */
945static void
946specify_value (char const **var, char const *value, char const *option)
947{
948 if (*var && strcmp (*var, value) != 0)
949 {
950 error (0, 0, _("conflicting %s option value `%s'"), option, value);
951 try_help (0, 0);
952 }
953 *var = value;
954}
955
956/* Set the output style to STYLE, diagnosing conflicts. */
957static void
958specify_style (enum output_style style)
959{
960 if (output_style != style)
961 {
962 if (output_style != OUTPUT_UNSPECIFIED)
963 try_help ("conflicting output style options", 0);
964 output_style = style;
965 }
966}
967
968
969static char const *
970filetype (struct stat const *st)
971{
972 /* See POSIX 1003.1-2001 for these formats.
973
974 To keep diagnostics grammatical in English, the returned string
975 must start with a consonant. */
976
977 if (S_ISREG (st->st_mode))
978 return st->st_size == 0 ? _("regular empty file") : _("regular file");
979
980 if (S_ISDIR (st->st_mode)) return _("directory");
981
982#ifdef S_ISBLK
983 if (S_ISBLK (st->st_mode)) return _("block special file");
984#endif
985#ifdef S_ISCHR
986 if (S_ISCHR (st->st_mode)) return _("character special file");
987#endif
988#ifdef S_ISFIFO
989 if (S_ISFIFO (st->st_mode)) return _("fifo");
990#endif
991 /* S_ISLNK is impossible with `fstat' and `stat'. */
992#ifdef S_ISSOCK
993 if (S_ISSOCK (st->st_mode)) return _("socket");
994#endif
995#ifdef S_TYPEISMQ
996 if (S_TYPEISMQ (st)) return _("message queue");
997#endif
998#ifdef S_TYPEISSEM
999 if (S_TYPEISSEM (st)) return _("semaphore");
1000#endif
1001#ifdef S_TYPEISSHM
1002 if (S_TYPEISSHM (st)) return _("shared memory object");
1003#endif
1004#ifdef S_TYPEISTMO
1005 if (S_TYPEISTMO (st)) return _("typed memory object");
1006#endif
1007
1008 return _("weird file");
1009}
1010
1011
1012/* Set the last-modified time of *ST to be the current time. */
1013
1014static void
1015set_mtime_to_now (struct stat *st)
1016{
1017#ifdef ST_MTIM_NSEC
1018
1019# if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1020 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1021 return;
1022# endif
1023
1024# if HAVE_GETTIMEOFDAY
1025 {
1026 struct timeval timeval;
1027 if (gettimeofday (&timeval, NULL) == 0)
1028 {
1029 st->st_mtime = timeval.tv_sec;
1030 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1031 return;
1032 }
1033 }
1034# endif
1035
1036#endif /* ST_MTIM_NSEC */
1037
1038 time (&st->st_mtime);
1039}
1040
1041
1042/* Compare two files (or dirs) with parent comparison PARENT
1043 and names NAME0 and NAME1.
1044 (If PARENT is 0, then the first name is just NAME0, etc.)
1045 This is self-contained; it opens the files and closes them.
1046
1047 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1048 different, EXIT_TROUBLE if there is a problem opening them. */
1049
1050static int
1051compare_files (struct comparison const *parent,
1052 char const *name0,
1053 char const *name1)
1054{
1055 struct comparison cmp;
1056#define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1057 register int f;
1058 int status = EXIT_SUCCESS;
1059 bool same_files;
1060 char *free0, *free1;
1061
1062 /* If this is directory comparison, perhaps we have a file
1063 that exists only in one of the directories.
1064 If so, just print a message to that effect. */
1065
1066 if (! ((name0 && name1)
1067 || (unidirectional_new_file && name1)
1068 || new_file))
1069 {
1070 char const *name = name0 == 0 ? name1 : name0;
1071 char const *dir = parent->file[name0 == 0].name;
1072
1073 /* See POSIX 1003.1-2001 for this format. */
1074 message ("Only in %s: %s\n", dir, name);
1075
1076 /* Return EXIT_FAILURE so that diff_dirs will return
1077 EXIT_FAILURE ("some files differ"). */
1078 return EXIT_FAILURE;
1079 }
1080
1081 memset (cmp.file, 0, sizeof cmp.file);
1082 cmp.parent = parent;
1083
1084 /* cmp.file[f].desc markers */
1085#define NONEXISTENT (-1) /* nonexistent file */
1086#define UNOPENED (-2) /* unopened file (e.g. directory) */
1087#define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1088
1089#define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1090
1091 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1092 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1093
1094 /* Now record the full name of each file, including nonexistent ones. */
1095
1096 if (name0 == 0)
1097 name0 = name1;
1098 if (name1 == 0)
1099 name1 = name0;
1100
1101 if (!parent)
1102 {
1103 free0 = 0;
1104 free1 = 0;
1105 cmp.file[0].name = name0;
1106 cmp.file[1].name = name1;
1107 }
1108 else
1109 {
1110 cmp.file[0].name = free0
1111 = dir_file_pathname (parent->file[0].name, name0);
1112 cmp.file[1].name = free1
1113 = dir_file_pathname (parent->file[1].name, name1);
1114 }
1115
1116 /* Stat the files. */
1117
1118 for (f = 0; f < 2; f++)
1119 {
1120 if (cmp.file[f].desc != NONEXISTENT)
1121 {
1122 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1123 {
1124 cmp.file[f].desc = cmp.file[0].desc;
1125 cmp.file[f].stat = cmp.file[0].stat;
1126 }
1127 else if (strcmp (cmp.file[f].name, "-") == 0)
1128 {
1129 cmp.file[f].desc = STDIN_FILENO;
1130 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1131 cmp.file[f].desc = ERRNO_ENCODE (errno);
1132 else
1133 {
1134 if (S_ISREG (cmp.file[f].stat.st_mode))
1135 {
1136 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1137 if (pos < 0)
1138 cmp.file[f].desc = ERRNO_ENCODE (errno);
1139 else
1140 cmp.file[f].stat.st_size =
1141 MAX (0, cmp.file[f].stat.st_size - pos);
1142 }
1143
1144 /* POSIX 1003.1-2001 requires current time for
1145 stdin. */
1146 set_mtime_to_now (&cmp.file[f].stat);
1147 }
1148 }
1149 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1150 cmp.file[f].desc = ERRNO_ENCODE (errno);
1151 }
1152 }
1153
1154 /* Mark files as nonexistent at the top level as needed for -N and
1155 --unidirectional-new-file. */
1156 if (! parent)
1157 {
1158 if ((new_file | unidirectional_new_file)
1159 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT)
1160 && cmp.file[1].desc == UNOPENED)
1161 cmp.file[0].desc = NONEXISTENT;
1162
1163 if (new_file
1164 && cmp.file[0].desc == UNOPENED
1165 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT))
1166 cmp.file[1].desc = NONEXISTENT;
1167 }
1168
1169 for (f = 0; f < 2; f++)
1170 if (cmp.file[f].desc == NONEXISTENT)
1171 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1172
1173 for (f = 0; f < 2; f++)
1174 {
1175 int e = ERRNO_DECODE (cmp.file[f].desc);
1176 if (0 <= e)
1177 {
1178 errno = e;
1179 perror_with_name (cmp.file[f].name);
1180 status = EXIT_TROUBLE;
1181 }
1182 }
1183
1184 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1185 {
1186 /* If one is a directory, and it was specified in the command line,
1187 use the file in that dir with the other file's basename. */
1188
1189 int fnm_arg = DIR_P (0);
1190 int dir_arg = 1 - fnm_arg;
1191 char const *fnm = cmp.file[fnm_arg].name;
1192 char const *dir = cmp.file[dir_arg].name;
1193 char const *filename = cmp.file[dir_arg].name = free0
1194 = dir_file_pathname (dir, base_name (fnm));
1195
1196 if (strcmp (fnm, "-") == 0)
1197 fatal ("cannot compare `-' to a directory");
1198
1199 if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1200 {
1201 perror_with_name (filename);
1202 status = EXIT_TROUBLE;
1203 }
1204 }
1205
1206 if (status != EXIT_SUCCESS)
1207 {
1208 /* One of the files should exist but does not. */
1209 }
1210 else if ((same_files
1211 = (cmp.file[0].desc != NONEXISTENT
1212 && cmp.file[1].desc != NONEXISTENT
1213 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1214 && same_file_attributes (&cmp.file[0].stat,
1215 &cmp.file[1].stat)))
1216 && no_diff_means_no_output)
1217 {
1218 /* The two named files are actually the same physical file.
1219 We know they are identical without actually reading them. */
1220 }
1221 else if (DIR_P (0) & DIR_P (1))
1222 {
1223 if (output_style == OUTPUT_IFDEF)
1224 fatal ("-D option not supported with directories");
1225
1226 /* If both are directories, compare the files in them. */
1227
1228 if (parent && !recursive)
1229 {
1230 /* But don't compare dir contents one level down
1231 unless -r was specified.
1232 See POSIX 1003.1-2001 for this format. */
1233 message ("Common subdirectories: %s and %s\n",
1234 cmp.file[0].name, cmp.file[1].name);
1235 }
1236 else
1237 status = diff_dirs (&cmp, compare_files);
1238 }
1239 else if ((DIR_P (0) | DIR_P (1))
1240 || (parent
1241 && (! S_ISREG (cmp.file[0].stat.st_mode)
1242 || ! S_ISREG (cmp.file[1].stat.st_mode))))
1243 {
1244 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1245 {
1246 /* We have a subdirectory that exists only in one directory. */
1247
1248 if ((DIR_P (0) | DIR_P (1))
1249 && recursive
1250 && (new_file
1251 || (unidirectional_new_file
1252 && cmp.file[0].desc == NONEXISTENT)))
1253 status = diff_dirs (&cmp, compare_files);
1254 else
1255 {
1256 char const *dir
1257 = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1258
1259 /* See POSIX 1003.1-2001 for this format. */
1260 message ("Only in %s: %s\n", dir, name0);
1261
1262 status = EXIT_FAILURE;
1263 }
1264 }
1265 else
1266 {
1267 /* We have two files that are not to be compared. */
1268
1269 /* See POSIX 1003.1-2001 for this format. */
1270 message5 ("File %s is a %s while file %s is a %s\n",
1271 file_label[0] ? file_label[0] : cmp.file[0].name,
1272 filetype (&cmp.file[0].stat),
1273 file_label[1] ? file_label[1] : cmp.file[1].name,
1274 filetype (&cmp.file[1].stat));
1275
1276 /* This is a difference. */
1277 status = EXIT_FAILURE;
1278 }
1279 }
1280 else if (files_can_be_treated_as_binary
1281 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1282 && (cmp.file[0].desc == NONEXISTENT
1283 || S_ISREG (cmp.file[0].stat.st_mode))
1284 && (cmp.file[1].desc == NONEXISTENT
1285 || S_ISREG (cmp.file[1].stat.st_mode)))
1286 {
1287 message ("Files %s and %s differ\n",
1288 file_label[0] ? file_label[0] : cmp.file[0].name,
1289 file_label[1] ? file_label[1] : cmp.file[1].name);
1290 status = EXIT_FAILURE;
1291 }
1292 else
1293 {
1294 /* Both exist and neither is a directory. */
1295
1296 /* Open the files and record their descriptors. */
1297
1298 if (cmp.file[0].desc == UNOPENED)
1299 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1300 {
1301 perror_with_name (cmp.file[0].name);
1302 status = EXIT_TROUBLE;
1303 }
1304 if (cmp.file[1].desc == UNOPENED)
1305 {
1306 if (same_files)
1307 cmp.file[1].desc = cmp.file[0].desc;
1308 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1309 < 0)
1310 {
1311 perror_with_name (cmp.file[1].name);
1312 status = EXIT_TROUBLE;
1313 }
1314 }
1315
1316#if HAVE_SETMODE_DOS
1317 if (binary)
1318 for (f = 0; f < 2; f++)
1319 if (0 <= cmp.file[f].desc)
1320 set_binary_mode (cmp.file[f].desc, 1);
1321#endif
1322
1323 /* Compare the files, if no error was found. */
1324
1325 if (status == EXIT_SUCCESS)
1326 status = diff_2_files (&cmp);
1327
1328 /* Close the file descriptors. */
1329
1330 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1331 {
1332 perror_with_name (cmp.file[0].name);
1333 status = EXIT_TROUBLE;
1334 }
1335 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1336 && close (cmp.file[1].desc) != 0)
1337 {
1338 perror_with_name (cmp.file[1].name);
1339 status = EXIT_TROUBLE;
1340 }
1341 }
1342
1343 /* Now the comparison has been done, if no error prevented it,
1344 and STATUS is the value this function will return. */
1345
1346 if (status == EXIT_SUCCESS)
1347 {
1348 if (report_identical_files && !DIR_P (0))
1349 message ("Files %s and %s are identical\n",
1350 file_label[0] ? file_label[0] : cmp.file[0].name,
1351 file_label[1] ? file_label[1] : cmp.file[1].name);
1352 }
1353 else
1354 {
1355 /* Flush stdout so that the user sees differences immediately.
1356 This can hurt performance, unfortunately. */
1357 if (fflush (stdout) != 0)
1358 pfatal_with_name (_("standard output"));
1359 }
1360
1361 if (free0)
1362 free (free0);
1363 if (free1)
1364 free (free1);
1365
1366 return status;
1367}
Note: See TracBrowser for help on using the repository browser.