source: vendor/diffutils/2.8.1/src/cmp.c

Last change on this file was 2556, checked in by bird, 19 years ago

diffutils 2.8.1

File size: 18.1 KB
Line 
1/* cmp - compare two files byte by byte
2
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4 2002 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING.
18 If not, write to the Free Software Foundation,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21
22#include "system.h"
23
24#include <stdio.h>
25#include <cmpbuf.h>
26#include <c-stack.h>
27#include <error.h>
28#include <exitfail.h>
29#include <freesoft.h>
30#include <getopt.h>
31#include <hard-locale.h>
32#include <inttostr.h>
33#include <setmode.h>
34#include <xalloc.h>
35#include <xstrtol.h>
36
37#if defined LC_MESSAGES && ENABLE_NLS
38# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
39#else
40# define hard_locale_LC_MESSAGES 0
41#endif
42
43static char const authorship_msgid[] =
44 N_("Written by Torbjorn Granlund and David MacKenzie.");
45
46static char const copyright_string[] =
47 "Copyright (C) 2002 Free Software Foundation, Inc.";
48
49extern char const version_string[];
50
51static int cmp (void);
52static off_t file_position (int);
53static size_t block_compare (word const *, word const *);
54static size_t block_compare_and_count (word const *, word const *, off_t *);
55static void sprintc (char *, unsigned char);
56
57/* Name under which this program was invoked. */
58char *program_name;
59
60/* Filenames of the compared files. */
61static char const *file[2];
62
63/* File descriptors of the files. */
64static int file_desc[2];
65
66/* Status of the files. */
67static struct stat stat_buf[2];
68
69/* Read buffers for the files. */
70static word *buffer[2];
71
72/* Optimal block size for the files. */
73static size_t buf_size;
74
75/* Initial prefix to ignore for each file. */
76static off_t ignore_initial[2];
77
78/* Number of bytes to compare. */
79static uintmax_t bytes = UINTMAX_MAX;
80
81/* Output format. */
82static enum comparison_type
83 {
84 type_first_diff, /* Print the first difference. */
85 type_all_diffs, /* Print all differences. */
86 type_status /* Exit status only. */
87 } comparison_type;
88
89/* If nonzero, print values of bytes quoted like cat -t does. */
90static bool opt_print_bytes;
91
92/* Values for long options that do not have single-letter equivalents. */
93enum
94{
95 HELP_OPTION = CHAR_MAX + 1
96};
97
98static struct option const long_options[] =
99{
100 {"print-bytes", 0, 0, 'b'},
101 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
102 {"ignore-initial", 1, 0, 'i'},
103 {"verbose", 0, 0, 'l'},
104 {"bytes", 1, 0, 'n'},
105 {"silent", 0, 0, 's'},
106 {"quiet", 0, 0, 's'},
107 {"version", 0, 0, 'v'},
108 {"help", 0, 0, HELP_OPTION},
109 {0, 0, 0, 0}
110};
111
112
113static void try_help (char const *, char const *) __attribute__((noreturn));
114static void
115try_help (char const *reason_msgid, char const *operand)
116{
117 if (reason_msgid)
118 error (0, 0, _(reason_msgid), operand);
119 error (EXIT_TROUBLE, 0,
120 _("Try `%s --help' for more information."), program_name);
121 abort ();
122}
123
124static char const valid_suffixes[] = "kKMGTPEZY0";
125
126/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127 point after the operand. If DELIMITER is nonzero, the operand may
128 be followed by DELIMITER; otherwise it must be null-terminated. */
129static off_t
130parse_ignore_initial (char **argptr, char delimiter)
131{
132 uintmax_t val;
133 off_t o;
134 char const *arg = *argptr;
135 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136 if (! (e == LONGINT_OK
137 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139 try_help ("invalid --ignore-initial value `%s'", arg);
140 return o;
141}
142
143/* Specify the output format. */
144static void
145specify_comparison_type (enum comparison_type t)
146{
147 if (comparison_type)
148 try_help ("options -l and -s are incompatible", 0);
149 comparison_type = t;
150}
151
152static void
153check_stdout (void)
154{
155 if (ferror (stdout))
156 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157 else if (fclose (stdout) != 0)
158 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159}
160
161static char const * const option_help_msgid[] = {
162 N_("-b --print-bytes Print differing bytes."),
163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166 N_("-l --verbose Output byte numbers and values of all differing bytes."),
167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
168 N_("-s --quiet --silent Output nothing; yield exit status only."),
169 N_("-v --version Output version info."),
170 N_("--help Output this help."),
171 0
172};
173
174static void
175usage (void)
176{
177 char const * const *p;
178
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 program_name);
181 printf ("%s\n\n", _("Compare two files byte by byte."));
182 for (p = option_help_msgid; *p; p++)
183 printf (" %s\n", _(*p));
184 printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 _("SKIP values may be followed by the following multiplicative suffixes:\n\
187kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 _("If a FILE is `-' or missing, read standard input."),
190 _("Report bugs to <bug-gnu-utils@gnu.org>."));
191}
192
193
194int
195main (int argc, char **argv)
196{
197 int c, f, exit_status;
198 size_t words_per_buffer;
199
200 exit_failure = EXIT_TROUBLE;
201 initialize_main (&argc, &argv);
202 program_name = argv[0];
203 setlocale (LC_ALL, "");
204 bindtextdomain (PACKAGE, LOCALEDIR);
205 textdomain (PACKAGE);
206 c_stack_action (c_stack_die);
207
208 /* Parse command line options. */
209
210 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
211 != -1)
212 switch (c)
213 {
214 case 'b':
215 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
216 opt_print_bytes = 1;
217 break;
218
219 case 'i':
220 ignore_initial[0] = parse_ignore_initial (&optarg, ':');
221 ignore_initial[1] = (*optarg++ == ':'
222 ? parse_ignore_initial (&optarg, 0)
223 : ignore_initial[0]);
224 break;
225
226 case 'l':
227 specify_comparison_type (type_all_diffs);
228 break;
229
230 case 'n':
231 {
232 uintmax_t n;
233 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
234 try_help ("invalid --bytes value `%s'", optarg);
235 if (n < bytes)
236 bytes = n;
237 }
238 break;
239
240 case 's':
241 specify_comparison_type (type_status);
242 break;
243
244 case 'v':
245 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
246 version_string, copyright_string,
247 _(free_software_msgid), _(authorship_msgid));
248 check_stdout ();
249 return EXIT_SUCCESS;
250
251 case HELP_OPTION:
252 usage ();
253 check_stdout ();
254 return EXIT_SUCCESS;
255
256 default:
257 try_help (0, 0);
258 }
259
260 if (optind == argc)
261 try_help ("missing operand after `%s'", argv[argc - 1]);
262
263 file[0] = argv[optind++];
264 file[1] = optind < argc ? argv[optind++] : "-";
265
266 for (f = 0; f < 2 && optind < argc; f++)
267 {
268 char *arg = argv[optind++];
269 ignore_initial[f] = parse_ignore_initial (&arg, 0);
270 }
271
272 if (optind < argc)
273 try_help ("extra operand `%s'", argv[optind]);
274
275 for (f = 0; f < 2; f++)
276 {
277 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278 stdin is closed and opening file[0] yields file descriptor 0. */
279 int f1 = f ^ (strcmp (file[1], "-") == 0);
280
281 /* Two files with the same name are identical.
282 But wait until we open the file once, for proper diagnostics. */
283 if (f && file_name_cmp (file[0], file[1]) == 0)
284 return EXIT_SUCCESS;
285
286 file_desc[f1] = (strcmp (file[f1], "-") == 0
287 ? STDIN_FILENO
288 : open (file[f1], O_RDONLY, 0));
289 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
290 {
291 if (file_desc[f1] < 0 && comparison_type == type_status)
292 exit (EXIT_TROUBLE);
293 else
294 error (EXIT_TROUBLE, errno, "%s", file[f1]);
295 }
296
297 set_binary_mode (file_desc[f1], 1);
298 }
299
300 /* If the files are links to the same inode and have the same file position,
301 they are identical. */
302
303 if (0 < same_file (&stat_buf[0], &stat_buf[1])
304 && same_file_attributes (&stat_buf[0], &stat_buf[1])
305 && file_position (0) == file_position (1))
306 return EXIT_SUCCESS;
307
308 /* If output is redirected to the null device, we may assume `-s'. */
309
310 if (comparison_type != type_status)
311 {
312 struct stat outstat, nullstat;
313
314 if (fstat (STDOUT_FILENO, &outstat) == 0
315 && stat (NULL_DEVICE, &nullstat) == 0
316 && 0 < same_file (&outstat, &nullstat))
317 comparison_type = type_status;
318 }
319
320 /* If only a return code is needed,
321 and if both input descriptors are associated with plain files,
322 conclude that the files differ if they have different sizes
323 and if more bytes will be compared than are in the smaller file. */
324
325 if (comparison_type == type_status
326 && S_ISREG (stat_buf[0].st_mode)
327 && S_ISREG (stat_buf[1].st_mode))
328 {
329 off_t s0 = stat_buf[0].st_size - file_position (0);
330 off_t s1 = stat_buf[1].st_size - file_position (1);
331 if (s0 < 0)
332 s0 = 0;
333 if (s1 < 0)
334 s1 = 0;
335 if (s0 != s1 && MIN (s0, s1) < bytes)
336 exit (EXIT_FAILURE);
337 }
338
339 /* Get the optimal block size of the files. */
340
341 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
342 STAT_BLOCKSIZE (stat_buf[1]),
343 PTRDIFF_MAX - sizeof (word));
344
345 /* Allocate word-aligned buffers, with space for sentinels at the end. */
346
347 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
348 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
349 buffer[1] = buffer[0] + words_per_buffer;
350
351 exit_status = cmp ();
352
353 for (f = 0; f < 2; f++)
354 if (close (file_desc[f]) != 0)
355 error (EXIT_TROUBLE, errno, "%s", file[f]);
356 if (exit_status != 0 && comparison_type != type_status)
357 check_stdout ();
358 exit (exit_status);
359 return exit_status;
360}
361
362
363/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364 using `buffer[0]' and `buffer[1]'.
365 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
366 >1 if error. */
367
368static int
369cmp (void)
370{
371 off_t line_number = 1; /* Line number (1...) of difference. */
372 off_t byte_number = 1; /* Byte number (1...) of difference. */
373 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
374 size_t read0, read1; /* Number of bytes read from each file. */
375 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
376 size_t smaller; /* The lesser of `read0' and `read1'. */
377 word *buffer0 = buffer[0];
378 word *buffer1 = buffer[1];
379 char *buf0 = (char *) buffer0;
380 char *buf1 = (char *) buffer1;
381 int ret = EXIT_SUCCESS;
382 int f;
383 int offset_width;
384
385 if (comparison_type == type_all_diffs)
386 {
387 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
388
389 for (f = 0; f < 2; f++)
390 if (S_ISREG (stat_buf[f].st_mode))
391 {
392 off_t file_bytes = stat_buf[f].st_size - file_position (f);
393 if (file_bytes < byte_number_max)
394 byte_number_max = file_bytes;
395 }
396
397 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
398 continue;
399 }
400
401 for (f = 0; f < 2; f++)
402 {
403 off_t ig = ignore_initial[f];
404 if (ig && file_position (f) == -1)
405 {
406 /* lseek failed; read and discard the ignored initial prefix. */
407 do
408 {
409 size_t bytes_to_read = MIN (ig, buf_size);
410 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411 if (r != bytes_to_read)
412 {
413 if (r == SIZE_MAX)
414 error (EXIT_TROUBLE, errno, "%s", file[f]);
415 break;
416 }
417 ig -= r;
418 }
419 while (ig);
420 }
421 }
422
423 do
424 {
425 size_t bytes_to_read = buf_size;
426
427 if (remaining != UINTMAX_MAX)
428 {
429 if (remaining < bytes_to_read)
430 bytes_to_read = remaining;
431 remaining -= bytes_to_read;
432 }
433
434 read0 = block_read (file_desc[0], buf0, bytes_to_read);
435 if (read0 == SIZE_MAX)
436 error (EXIT_TROUBLE, errno, "%s", file[0]);
437 read1 = block_read (file_desc[1], buf1, bytes_to_read);
438 if (read1 == SIZE_MAX)
439 error (EXIT_TROUBLE, errno, "%s", file[1]);
440
441 /* Insert sentinels for the block compare. */
442
443 buf0[read0] = ~buf1[read0];
444 buf1[read1] = ~buf0[read1];
445
446 /* If the line number should be written for differing files,
447 compare the blocks and count the number of newlines
448 simultaneously. */
449 first_diff = (comparison_type == type_first_diff
450 ? block_compare_and_count (buffer0, buffer1, &line_number)
451 : block_compare (buffer0, buffer1));
452
453 byte_number += first_diff;
454 smaller = MIN (read0, read1);
455
456 if (first_diff < smaller)
457 {
458 switch (comparison_type)
459 {
460 case type_first_diff:
461 {
462 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463 char line_buf[INT_BUFSIZE_BOUND (off_t)];
464 char const *byte_num = offtostr (byte_number, byte_buf);
465 char const *line_num = offtostr (line_number, line_buf);
466 if (!opt_print_bytes)
467 {
468 /* See POSIX 1003.1-2001 for this format. This
469 message is used only in the POSIX locale, so it
470 need not be translated. */
471 static char const char_message[] =
472 "%s %s differ: char %s, line %s\n";
473
474 /* The POSIX rationale recommends using the word
475 "byte" outside the POSIX locale. Some gettext
476 implementations translate even in the POSIX
477 locale if certain other environment variables
478 are set, so use "byte" if a translation is
479 available, or if outside the POSIX locale. */
480 static char const byte_msgid[] =
481 N_("%s %s differ: byte %s, line %s\n");
482 char const *byte_message = _(byte_msgid);
483 bool use_byte_message = (byte_message != byte_msgid
484 || hard_locale_LC_MESSAGES);
485
486 printf ((use_byte_message
487 ? byte_message
488 : "%s %s differ: char %s, line %s\n"),
489 file[0], file[1], byte_num, line_num);
490 }
491 else
492 {
493 unsigned char c0 = buf0[first_diff];
494 unsigned char c1 = buf1[first_diff];
495 char s0[5];
496 char s1[5];
497 sprintc (s0, c0);
498 sprintc (s1, c1);
499 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
500 file[0], file[1], byte_num, line_num,
501 c0, s0, c1, s1);
502 }
503 }
504 /* Fall through. */
505 case type_status:
506 return EXIT_FAILURE;
507
508 case type_all_diffs:
509 do
510 {
511 unsigned char c0 = buf0[first_diff];
512 unsigned char c1 = buf1[first_diff];
513 if (c0 != c1)
514 {
515 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
516 char const *byte_num = offtostr (byte_number, byte_buf);
517 if (!opt_print_bytes)
518 {
519 /* See POSIX 1003.1-2001 for this format. */
520 printf ("%*s %3o %3o\n",
521 offset_width, byte_num, c0, c1);
522 }
523 else
524 {
525 char s0[5];
526 char s1[5];
527 sprintc (s0, c0);
528 sprintc (s1, c1);
529 printf ("%*s %3o %-4s %3o %s\n",
530 offset_width, byte_num, c0, s0, c1, s1);
531 }
532 }
533 byte_number++;
534 first_diff++;
535 }
536 while (first_diff < smaller);
537 ret = EXIT_FAILURE;
538 break;
539 }
540 }
541
542 if (read0 != read1)
543 {
544 if (comparison_type != type_status)
545 {
546 /* See POSIX 1003.1-2001 for this format. */
547 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
548 }
549
550 return EXIT_FAILURE;
551 }
552 }
553 while (read0 == buf_size);
554
555 return ret;
556}
557
558
559/* Compare two blocks of memory P0 and P1 until they differ,
560 and count the number of '\n' occurrences in the common
561 part of P0 and P1.
562 If the blocks are not guaranteed to be different, put sentinels at the ends
563 of the blocks before calling this function.
564
565 Return the offset of the first byte that differs.
566 Increment *COUNT by the count of '\n' occurrences. */
567
568static size_t
569block_compare_and_count (word const *p0, word const *p1, off_t *count)
570{
571 word l; /* One word from first buffer. */
572 word const *l0, *l1; /* Pointers into each buffer. */
573 char const *c0, *c1; /* Pointers for finding exact address. */
574 size_t cnt = 0; /* Number of '\n' occurrences. */
575 word nnnn; /* Newline, sizeof (word) times. */
576 int i;
577
578 nnnn = 0;
579 for (i = 0; i < sizeof nnnn; i++)
580 nnnn = (nnnn << CHAR_BIT) | '\n';
581
582 /* Find the rough position of the first difference by reading words,
583 not bytes. */
584
585 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
586 {
587 l ^= nnnn;
588 for (i = 0; i < sizeof l; i++)
589 {
590 cnt += ! (unsigned char) l;
591 l >>= CHAR_BIT;
592 }
593 }
594
595 /* Find the exact differing position (endianness independent). */
596
597 for (c0 = (char const *) l0, c1 = (char const *) l1;
598 *c0 == *c1;
599 c0++, c1++)
600 cnt += *c0 == '\n';
601
602 *count += cnt;
603 return c0 - (char const *) p0;
604}
605
606
607/* Compare two blocks of memory P0 and P1 until they differ.
608 If the blocks are not guaranteed to be different, put sentinels at the ends
609 of the blocks before calling this function.
610
611 Return the offset of the first byte that differs. */
612
613static size_t
614block_compare (word const *p0, word const *p1)
615{
616 word const *l0, *l1;
617 char const *c0, *c1;
618
619 /* Find the rough position of the first difference by reading words,
620 not bytes. */
621
622 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
623 continue;
624
625 /* Find the exact differing position (endianness independent). */
626
627 for (c0 = (char const *) l0, c1 = (char const *) l1;
628 *c0 == *c1;
629 c0++, c1++)
630 continue;
631
632 return c0 - (char const *) p0;
633}
634
635/* Put into BUF the unsigned char C, making unprintable bytes
636 visible by quoting like cat -t does. */
637
638static void
639sprintc (char *buf, unsigned char c)
640{
641 if (! ISPRINT (c))
642 {
643 if (c >= 128)
644 {
645 *buf++ = 'M';
646 *buf++ = '-';
647 c -= 128;
648 }
649 if (c < 32)
650 {
651 *buf++ = '^';
652 c += 64;
653 }
654 else if (c == 127)
655 {
656 *buf++ = '^';
657 c = '?';
658 }
659 }
660
661 *buf++ = c;
662 *buf = 0;
663}
664
665
666/* Position file F to ignore_initial[F] bytes from its initial position,
667 and yield its new position. Don't try more than once. */
668
669static off_t
670file_position (int f)
671{
672 static bool positioned[2];
673 static off_t position[2];
674
675 if (! positioned[f])
676 {
677 positioned[f] = 1;
678 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
679 }
680 return position[f];
681}
Note: See TracBrowser for help on using the repository browser.