source: trunk/diffutils/src/cmp.c@ 3020

Last change on this file since 3020 was 2644, checked in by bird, 20 years ago

open the files in binary mode!

File size: 18.4 KB
Line 
1/* cmp - compare two files byte by byte
2
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4 2002 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING.
18 If not, write to the Free Software Foundation,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21
22#include "system.h"
23
24#include <stdio.h>
25#include <cmpbuf.h>
26#include <c-stack.h>
27#include <error.h>
28#include <exitfail.h>
29#include <freesoft.h>
30#include <getopt.h>
31#include <hard-locale.h>
32#include <inttostr.h>
33#include <setmode.h>
34#include <xalloc.h>
35#include <xstrtol.h>
36
37#if defined LC_MESSAGES && ENABLE_NLS
38# define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
39#else
40# define hard_locale_LC_MESSAGES 0
41#endif
42
43static char const authorship_msgid[] =
44 N_("Written by Torbjorn Granlund and David MacKenzie.");
45
46static char const copyright_string[] =
47 "Copyright (C) 2002 Free Software Foundation, Inc.";
48
49extern char const version_string[];
50
51static int cmp (void);
52static off_t file_position (int);
53static size_t block_compare (word const *, word const *);
54static size_t block_compare_and_count (word const *, word const *, off_t *);
55static void sprintc (char *, unsigned char);
56
57/* Name under which this program was invoked. */
58char *program_name;
59
60/* Filenames of the compared files. */
61static char const *file[2];
62
63/* File descriptors of the files. */
64static int file_desc[2];
65
66/* Status of the files. */
67static struct stat stat_buf[2];
68
69/* Read buffers for the files. */
70static word *buffer[2];
71
72/* Optimal block size for the files. */
73static size_t buf_size;
74
75/* Initial prefix to ignore for each file. */
76static off_t ignore_initial[2];
77
78/* Number of bytes to compare. */
79static uintmax_t bytes = UINTMAX_MAX;
80
81/* Output format. */
82static enum comparison_type
83 {
84 type_first_diff, /* Print the first difference. */
85 type_all_diffs, /* Print all differences. */
86 type_status /* Exit status only. */
87 } comparison_type;
88
89/* If nonzero, print values of bytes quoted like cat -t does. */
90static bool opt_print_bytes;
91
92/* Values for long options that do not have single-letter equivalents. */
93enum
94{
95 HELP_OPTION = CHAR_MAX + 1
96};
97
98static struct option const long_options[] =
99{
100 {"print-bytes", 0, 0, 'b'},
101 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
102 {"ignore-initial", 1, 0, 'i'},
103 {"verbose", 0, 0, 'l'},
104 {"bytes", 1, 0, 'n'},
105 {"silent", 0, 0, 's'},
106 {"quiet", 0, 0, 's'},
107 {"version", 0, 0, 'v'},
108 {"help", 0, 0, HELP_OPTION},
109 {0, 0, 0, 0}
110};
111
112
113static void try_help (char const *, char const *) __attribute__((noreturn));
114static void
115try_help (char const *reason_msgid, char const *operand)
116{
117 if (reason_msgid)
118 error (0, 0, _(reason_msgid), operand);
119 error (EXIT_TROUBLE, 0,
120 _("Try `%s --help' for more information."), program_name);
121 abort ();
122}
123
124static char const valid_suffixes[] = "kKMGTPEZY0";
125
126/* Parse an operand *ARGPTR of --ignore-initial, updating *ARGPTR to
127 point after the operand. If DELIMITER is nonzero, the operand may
128 be followed by DELIMITER; otherwise it must be null-terminated. */
129static off_t
130parse_ignore_initial (char **argptr, char delimiter)
131{
132 uintmax_t val;
133 off_t o;
134 char const *arg = *argptr;
135 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
136 if (! (e == LONGINT_OK
137 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
138 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
139 try_help ("invalid --ignore-initial value `%s'", arg);
140 return o;
141}
142
143/* Specify the output format. */
144static void
145specify_comparison_type (enum comparison_type t)
146{
147 if (comparison_type)
148 try_help ("options -l and -s are incompatible", 0);
149 comparison_type = t;
150}
151
152static void
153check_stdout (void)
154{
155 if (ferror (stdout))
156 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
157 else if (fclose (stdout) != 0)
158 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
159}
160
161static char const * const option_help_msgid[] = {
162 N_("-b --print-bytes Print differing bytes."),
163 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
164 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
165 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
166 N_("-l --verbose Output byte numbers and values of all differing bytes."),
167 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
168 N_("-s --quiet --silent Output nothing; yield exit status only."),
169 N_("-v --version Output version info."),
170 N_("--help Output this help."),
171 0
172};
173
174static void
175usage (void)
176{
177 char const * const *p;
178
179 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
180 program_name);
181 printf ("%s\n\n", _("Compare two files byte by byte."));
182 for (p = option_help_msgid; *p; p++)
183 printf (" %s\n", _(*p));
184 printf ("\n%s\n%s\n\n%s\n\n%s\n",
185 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
186 _("SKIP values may be followed by the following multiplicative suffixes:\n\
187kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
188GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
189 _("If a FILE is `-' or missing, read standard input."),
190 _("Report bugs to <bug-gnu-utils@gnu.org>."));
191}
192
193
194int
195main (int argc, char **argv)
196{
197 int c, f, exit_status;
198 size_t words_per_buffer;
199
200 exit_failure = EXIT_TROUBLE;
201 initialize_main (&argc, &argv);
202 program_name = argv[0];
203 setlocale (LC_ALL, "");
204 bindtextdomain (PACKAGE, LOCALEDIR);
205 textdomain (PACKAGE);
206 c_stack_action (c_stack_die);
207
208 /* Parse command line options. */
209
210 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
211 != -1)
212 switch (c)
213 {
214 case 'b':
215 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
216 opt_print_bytes = 1;
217 break;
218
219 case 'i':
220 ignore_initial[0] = parse_ignore_initial (&optarg, ':');
221 ignore_initial[1] = (*optarg++ == ':'
222 ? parse_ignore_initial (&optarg, 0)
223 : ignore_initial[0]);
224 break;
225
226 case 'l':
227 specify_comparison_type (type_all_diffs);
228 break;
229
230 case 'n':
231 {
232 uintmax_t n;
233 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
234 try_help ("invalid --bytes value `%s'", optarg);
235 if (n < bytes)
236 bytes = n;
237 }
238 break;
239
240 case 's':
241 specify_comparison_type (type_status);
242 break;
243
244 case 'v':
245 printf ("cmp %s\n%s\n\n%s\n\n%s\n",
246 version_string, copyright_string,
247 _(free_software_msgid), _(authorship_msgid));
248 check_stdout ();
249 return EXIT_SUCCESS;
250
251 case HELP_OPTION:
252 usage ();
253 check_stdout ();
254 return EXIT_SUCCESS;
255
256 default:
257 try_help (0, 0);
258 }
259
260 if (optind == argc)
261 try_help ("missing operand after `%s'", argv[argc - 1]);
262
263 file[0] = argv[optind++];
264 file[1] = optind < argc ? argv[optind++] : "-";
265
266 for (f = 0; f < 2 && optind < argc; f++)
267 {
268 char *arg = argv[optind++];
269 ignore_initial[f] = parse_ignore_initial (&arg, 0);
270 }
271
272 if (optind < argc)
273 try_help ("extra operand `%s'", argv[optind]);
274
275 for (f = 0; f < 2; f++)
276 {
277 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278 stdin is closed and opening file[0] yields file descriptor 0. */
279 int f1 = f ^ (strcmp (file[1], "-") == 0);
280
281 /* Two files with the same name are identical.
282 But wait until we open the file once, for proper diagnostics. */
283 if (f && file_name_cmp (file[0], file[1]) == 0)
284 return EXIT_SUCCESS;
285
286#ifdef O_BINARY
287 if (strcmp (file[f1], "-") == 0)
288 {
289 file_desc[f1] = STDIN_FILENO;
290 _setmode (file_desc[f1], O_BINARY);
291 }
292 else
293 file_desc[f1] = open (file[f1], O_RDONLY | O_BINARY, 0);
294#else
295 file_desc[f1] = (strcmp (file[f1], "-") == 0
296 ? STDIN_FILENO
297 : open (file[f1], O_RDONLY | O_BINARY, 0));
298#endif
299 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
300 {
301 if (file_desc[f1] < 0 && comparison_type == type_status)
302 exit (EXIT_TROUBLE);
303 else
304 error (EXIT_TROUBLE, errno, "%s", file[f1]);
305 }
306
307 set_binary_mode (file_desc[f1], 1);
308 }
309
310 /* If the files are links to the same inode and have the same file position,
311 they are identical. */
312
313 if (0 < same_file (&stat_buf[0], &stat_buf[1])
314 && same_file_attributes (&stat_buf[0], &stat_buf[1])
315 && file_position (0) == file_position (1))
316 return EXIT_SUCCESS;
317
318 /* If output is redirected to the null device, we may assume `-s'. */
319
320 if (comparison_type != type_status)
321 {
322 struct stat outstat, nullstat;
323
324 if (fstat (STDOUT_FILENO, &outstat) == 0
325 && stat (NULL_DEVICE, &nullstat) == 0
326 && 0 < same_file (&outstat, &nullstat))
327 comparison_type = type_status;
328 }
329
330 /* If only a return code is needed,
331 and if both input descriptors are associated with plain files,
332 conclude that the files differ if they have different sizes
333 and if more bytes will be compared than are in the smaller file. */
334
335 if (comparison_type == type_status
336 && S_ISREG (stat_buf[0].st_mode)
337 && S_ISREG (stat_buf[1].st_mode))
338 {
339 off_t s0 = stat_buf[0].st_size - file_position (0);
340 off_t s1 = stat_buf[1].st_size - file_position (1);
341 if (s0 < 0)
342 s0 = 0;
343 if (s1 < 0)
344 s1 = 0;
345 if (s0 != s1 && MIN (s0, s1) < bytes)
346 exit (EXIT_FAILURE);
347 }
348
349 /* Get the optimal block size of the files. */
350
351 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
352 STAT_BLOCKSIZE (stat_buf[1]),
353 PTRDIFF_MAX - sizeof (word));
354
355 /* Allocate word-aligned buffers, with space for sentinels at the end. */
356
357 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
358 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
359 buffer[1] = buffer[0] + words_per_buffer;
360
361 exit_status = cmp ();
362
363 for (f = 0; f < 2; f++)
364 if (close (file_desc[f]) != 0)
365 error (EXIT_TROUBLE, errno, "%s", file[f]);
366 if (exit_status != 0 && comparison_type != type_status)
367 check_stdout ();
368 exit (exit_status);
369 return exit_status;
370}
371
372
373/* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
374 using `buffer[0]' and `buffer[1]'.
375 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
376 >1 if error. */
377
378static int
379cmp (void)
380{
381 off_t line_number = 1; /* Line number (1...) of difference. */
382 off_t byte_number = 1; /* Byte number (1...) of difference. */
383 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
384 size_t read0, read1; /* Number of bytes read from each file. */
385 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
386 size_t smaller; /* The lesser of `read0' and `read1'. */
387 word *buffer0 = buffer[0];
388 word *buffer1 = buffer[1];
389 char *buf0 = (char *) buffer0;
390 char *buf1 = (char *) buffer1;
391 int ret = EXIT_SUCCESS;
392 int f;
393 int offset_width;
394
395 if (comparison_type == type_all_diffs)
396 {
397 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
398
399 for (f = 0; f < 2; f++)
400 if (S_ISREG (stat_buf[f].st_mode))
401 {
402 off_t file_bytes = stat_buf[f].st_size - file_position (f);
403 if (file_bytes < byte_number_max)
404 byte_number_max = file_bytes;
405 }
406
407 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
408 continue;
409 }
410
411 for (f = 0; f < 2; f++)
412 {
413 off_t ig = ignore_initial[f];
414 if (ig && file_position (f) == -1)
415 {
416 /* lseek failed; read and discard the ignored initial prefix. */
417 do
418 {
419 size_t bytes_to_read = MIN (ig, buf_size);
420 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
421 if (r != bytes_to_read)
422 {
423 if (r == SIZE_MAX)
424 error (EXIT_TROUBLE, errno, "%s", file[f]);
425 break;
426 }
427 ig -= r;
428 }
429 while (ig);
430 }
431 }
432
433 do
434 {
435 size_t bytes_to_read = buf_size;
436
437 if (remaining != UINTMAX_MAX)
438 {
439 if (remaining < bytes_to_read)
440 bytes_to_read = remaining;
441 remaining -= bytes_to_read;
442 }
443
444 read0 = block_read (file_desc[0], buf0, bytes_to_read);
445 if (read0 == SIZE_MAX)
446 error (EXIT_TROUBLE, errno, "%s", file[0]);
447 read1 = block_read (file_desc[1], buf1, bytes_to_read);
448 if (read1 == SIZE_MAX)
449 error (EXIT_TROUBLE, errno, "%s", file[1]);
450
451 /* Insert sentinels for the block compare. */
452
453 buf0[read0] = ~buf1[read0];
454 buf1[read1] = ~buf0[read1];
455
456 /* If the line number should be written for differing files,
457 compare the blocks and count the number of newlines
458 simultaneously. */
459 first_diff = (comparison_type == type_first_diff
460 ? block_compare_and_count (buffer0, buffer1, &line_number)
461 : block_compare (buffer0, buffer1));
462
463 byte_number += first_diff;
464 smaller = MIN (read0, read1);
465
466 if (first_diff < smaller)
467 {
468 switch (comparison_type)
469 {
470 case type_first_diff:
471 {
472 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
473 char line_buf[INT_BUFSIZE_BOUND (off_t)];
474 char const *byte_num = offtostr (byte_number, byte_buf);
475 char const *line_num = offtostr (line_number, line_buf);
476 if (!opt_print_bytes)
477 {
478 /* See POSIX 1003.1-2001 for this format. This
479 message is used only in the POSIX locale, so it
480 need not be translated. */
481 static char const char_message[] =
482 "%s %s differ: char %s, line %s\n";
483
484 /* The POSIX rationale recommends using the word
485 "byte" outside the POSIX locale. Some gettext
486 implementations translate even in the POSIX
487 locale if certain other environment variables
488 are set, so use "byte" if a translation is
489 available, or if outside the POSIX locale. */
490 static char const byte_msgid[] =
491 N_("%s %s differ: byte %s, line %s\n");
492 char const *byte_message = _(byte_msgid);
493 bool use_byte_message = (byte_message != byte_msgid
494 || hard_locale_LC_MESSAGES);
495
496 printf ((use_byte_message
497 ? byte_message
498 : "%s %s differ: char %s, line %s\n"),
499 file[0], file[1], byte_num, line_num);
500 }
501 else
502 {
503 unsigned char c0 = buf0[first_diff];
504 unsigned char c1 = buf1[first_diff];
505 char s0[5];
506 char s1[5];
507 sprintc (s0, c0);
508 sprintc (s1, c1);
509 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
510 file[0], file[1], byte_num, line_num,
511 c0, s0, c1, s1);
512 }
513 }
514 /* Fall through. */
515 case type_status:
516 return EXIT_FAILURE;
517
518 case type_all_diffs:
519 do
520 {
521 unsigned char c0 = buf0[first_diff];
522 unsigned char c1 = buf1[first_diff];
523 if (c0 != c1)
524 {
525 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
526 char const *byte_num = offtostr (byte_number, byte_buf);
527 if (!opt_print_bytes)
528 {
529 /* See POSIX 1003.1-2001 for this format. */
530 printf ("%*s %3o %3o\n",
531 offset_width, byte_num, c0, c1);
532 }
533 else
534 {
535 char s0[5];
536 char s1[5];
537 sprintc (s0, c0);
538 sprintc (s1, c1);
539 printf ("%*s %3o %-4s %3o %s\n",
540 offset_width, byte_num, c0, s0, c1, s1);
541 }
542 }
543 byte_number++;
544 first_diff++;
545 }
546 while (first_diff < smaller);
547 ret = EXIT_FAILURE;
548 break;
549 }
550 }
551
552 if (read0 != read1)
553 {
554 if (comparison_type != type_status)
555 {
556 /* See POSIX 1003.1-2001 for this format. */
557 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
558 }
559
560 return EXIT_FAILURE;
561 }
562 }
563 while (read0 == buf_size);
564
565 return ret;
566}
567
568
569/* Compare two blocks of memory P0 and P1 until they differ,
570 and count the number of '\n' occurrences in the common
571 part of P0 and P1.
572 If the blocks are not guaranteed to be different, put sentinels at the ends
573 of the blocks before calling this function.
574
575 Return the offset of the first byte that differs.
576 Increment *COUNT by the count of '\n' occurrences. */
577
578static size_t
579block_compare_and_count (word const *p0, word const *p1, off_t *count)
580{
581 word l; /* One word from first buffer. */
582 word const *l0, *l1; /* Pointers into each buffer. */
583 char const *c0, *c1; /* Pointers for finding exact address. */
584 size_t cnt = 0; /* Number of '\n' occurrences. */
585 word nnnn; /* Newline, sizeof (word) times. */
586 int i;
587
588 nnnn = 0;
589 for (i = 0; i < sizeof nnnn; i++)
590 nnnn = (nnnn << CHAR_BIT) | '\n';
591
592 /* Find the rough position of the first difference by reading words,
593 not bytes. */
594
595 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
596 {
597 l ^= nnnn;
598 for (i = 0; i < sizeof l; i++)
599 {
600 cnt += ! (unsigned char) l;
601 l >>= CHAR_BIT;
602 }
603 }
604
605 /* Find the exact differing position (endianness independent). */
606
607 for (c0 = (char const *) l0, c1 = (char const *) l1;
608 *c0 == *c1;
609 c0++, c1++)
610 cnt += *c0 == '\n';
611
612 *count += cnt;
613 return c0 - (char const *) p0;
614}
615
616
617/* Compare two blocks of memory P0 and P1 until they differ.
618 If the blocks are not guaranteed to be different, put sentinels at the ends
619 of the blocks before calling this function.
620
621 Return the offset of the first byte that differs. */
622
623static size_t
624block_compare (word const *p0, word const *p1)
625{
626 word const *l0, *l1;
627 char const *c0, *c1;
628
629 /* Find the rough position of the first difference by reading words,
630 not bytes. */
631
632 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
633 continue;
634
635 /* Find the exact differing position (endianness independent). */
636
637 for (c0 = (char const *) l0, c1 = (char const *) l1;
638 *c0 == *c1;
639 c0++, c1++)
640 continue;
641
642 return c0 - (char const *) p0;
643}
644
645/* Put into BUF the unsigned char C, making unprintable bytes
646 visible by quoting like cat -t does. */
647
648static void
649sprintc (char *buf, unsigned char c)
650{
651 if (! ISPRINT (c))
652 {
653 if (c >= 128)
654 {
655 *buf++ = 'M';
656 *buf++ = '-';
657 c -= 128;
658 }
659 if (c < 32)
660 {
661 *buf++ = '^';
662 c += 64;
663 }
664 else if (c == 127)
665 {
666 *buf++ = '^';
667 c = '?';
668 }
669 }
670
671 *buf++ = c;
672 *buf = 0;
673}
674
675
676/* Position file F to ignore_initial[F] bytes from its initial position,
677 and yield its new position. Don't try more than once. */
678
679static off_t
680file_position (int f)
681{
682 static bool positioned[2];
683 static off_t position[2];
684
685 if (! positioned[f])
686 {
687 positioned[f] = 1;
688 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);
689 }
690 return position[f];
691}
Note: See TracBrowser for help on using the repository browser.