source: trunk/essentials/sys-apps/diffutils/src/io.c

Last change on this file was 3079, checked in by bird, 18 years ago

don't mess with non-seekable files.

File size: 23.7 KB
Line 
1/* File I/O for GNU DIFF.
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "diff.h"
24#include <cmpbuf.h>
25#include <regex.h>
26#include <setmode.h>
27#include <xalloc.h>
28
29/* Rotate an unsigned value to the left. */
30#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
31
32/* Given a hash value and a new character, return a new hash value. */
33#define HASH(h, c) ((c) + ROL (h, 7))
34
35
36/* The type of a hash value. */
37typedef size_t hash_value;
38verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
39
40/* Lines are put into equivalence classes of lines that match in lines_differ.
41 Each equivalence class is represented by one of these structures,
42 but only while the classes are being computed.
43 Afterward, each class is represented by a number. */
44struct equivclass
45{
46 lin next; /* Next item in this bucket. */
47 hash_value hash; /* Hash of lines in this class. */
48 char const *line; /* A line that fits this class. */
49 size_t length; /* That line's length, not counting its newline. */
50};
51
52/* Hash-table: array of buckets, each being a chain of equivalence classes.
53 buckets[-1] is reserved for incomplete lines. */
54static lin *buckets;
55
56/* Number of buckets in the hash table array, not counting buckets[-1]. */
57static size_t nbuckets;
58
59/* Array in which the equivalence classes are allocated.
60 The bucket-chains go through the elements in this array.
61 The number of an equivalence class is its index in this array. */
62static struct equivclass *equivs;
63
64/* Index of first free element in the array `equivs'. */
65static lin equivs_index;
66
67/* Number of elements allocated in the array `equivs'. */
68static lin equivs_alloc;
69
70
71/* Read a block of data into a file buffer, checking for EOF and error. */
72
73void
74file_block_read (struct file_data *current, size_t size)
75{
76 if (size && ! current->eof)
77 {
78 size_t s = block_read (current->desc,
79 FILE_BUFFER (current) + current->buffered, size);
80 if (s == SIZE_MAX)
81 pfatal_with_name (current->name);
82 current->buffered += s;
83 current->eof = s < size;
84 }
85}
86
87
88/* Check for binary files and compare them for exact identity. */
89
90/* Return 1 if BUF contains a non text character.
91 SIZE is the number of characters in BUF. */
92
93#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
94
95/* Get ready to read the current file.
96 Return nonzero if SKIP_TEST is zero,
97 and if it appears to be a binary file. */
98
99static bool
100sip (struct file_data *current, bool skip_test)
101{
102 /* If we have a nonexistent file at this stage, treat it as empty. */
103 if (current->desc < 0)
104 {
105 /* Leave room for a sentinel. */
106 current->bufsize = sizeof (word);
107 current->buffer = xmalloc (current->bufsize);
108 }
109 else
110 {
111 current->bufsize = buffer_lcm (sizeof (word),
112 STAT_BLOCKSIZE (current->stat),
113 PTRDIFF_MAX - 2 * sizeof (word));
114 current->buffer = xmalloc (current->bufsize);
115
116#ifdef __EMX__
117 if (! skip_test
118 && ! lseek (current->desc, 0, SEEK_CUR))
119#else
120 if (! skip_test)
121#endif
122 {
123 /* Check first part of file to see if it's a binary file. */
124
125 bool was_binary = set_binary_mode (current->desc, 1);
126 off_t buffered;
127 file_block_read (current, current->bufsize);
128 buffered = current->buffered;
129
130 if (! was_binary)
131 {
132 /* Revert to text mode and seek back to the beginning to
133 reread the file. Use relative seek, since file
134 descriptors like stdin might not start at offset
135 zero. */
136
137 if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
138 pfatal_with_name (current->name);
139 set_binary_mode (current->desc, 0);
140 current->buffered = 0;
141 current->eof = 0;
142 }
143
144 return binary_file_p (current->buffer, buffered);
145 }
146 }
147
148 current->buffered = 0;
149 current->eof = 0;
150 return 0;
151}
152
153/* Slurp the rest of the current file completely into memory. */
154
155static void
156slurp (struct file_data *current)
157{
158 size_t cc;
159
160 if (current->desc < 0)
161 {
162 /* The file is nonexistent. */
163 return;
164 }
165
166 if (S_ISREG (current->stat.st_mode))
167 {
168 /* It's a regular file; slurp in the rest all at once. */
169
170 /* Get the size out of the stat block.
171 Allocate just enough room for appended newline plus word sentinel,
172 plus word-alignment since we want the buffer word-aligned. */
173 size_t file_size = current->stat.st_size;
174 cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
175 if (file_size != current->stat.st_size || cc < file_size
176 || PTRDIFF_MAX <= cc)
177 xalloc_die ();
178
179 if (current->bufsize < cc)
180 {
181 current->bufsize = cc;
182 current->buffer = xrealloc (current->buffer, cc);
183 }
184
185 /* Try to read at least 1 more byte than the size indicates, to
186 detect whether the file is growing. This is a nicety for
187 users who run 'diff' on files while they are changing. */
188
189 if (current->buffered <= file_size)
190 {
191 file_block_read (current, file_size + 1 - current->buffered);
192 if (current->buffered <= file_size)
193 return;
194 }
195 }
196
197 /* It's not a regular file, or it's a growing regular file; read it,
198 growing the buffer as needed. */
199
200 file_block_read (current, current->bufsize - current->buffered);
201
202 if (current->buffered)
203 {
204 while (current->buffered == current->bufsize)
205 {
206 if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
207 xalloc_die ();
208 current->bufsize *= 2;
209 current->buffer = xrealloc (current->buffer, current->bufsize);
210 file_block_read (current, current->bufsize - current->buffered);
211 }
212
213 /* Allocate just enough room for appended newline plus word
214 sentinel, plus word-alignment. */
215 cc = current->buffered + 2 * sizeof (word);
216 current->bufsize = cc - cc % sizeof (word);
217 current->buffer = xrealloc (current->buffer, current->bufsize);
218 }
219}
220
221
222/* Split the file into lines, simultaneously computing the equivalence
223 class for each line. */
224
225static void
226find_and_hash_each_line (struct file_data *current)
227{
228 hash_value h;
229 unsigned char const *p = (unsigned char const *) current->prefix_end;
230 unsigned char c;
231 lin i, *bucket;
232 size_t length;
233
234 /* Cache often-used quantities in local variables to help the compiler. */
235 char const **linbuf = current->linbuf;
236 lin alloc_lines = current->alloc_lines;
237 lin line = 0;
238 lin linbuf_base = current->linbuf_base;
239 lin *cureqs = xmalloc (alloc_lines * sizeof *cureqs);
240 struct equivclass *eqs = equivs;
241 lin eqs_index = equivs_index;
242 lin eqs_alloc = equivs_alloc;
243 char const *suffix_begin = current->suffix_begin;
244 char const *bufend = FILE_BUFFER (current) + current->buffered;
245 bool diff_length_compare_anyway =
246 ignore_white_space != IGNORE_NO_WHITE_SPACE;
247 bool same_length_diff_contents_compare_anyway =
248 diff_length_compare_anyway | ignore_case;
249
250 while ((char const *) p < suffix_begin)
251 {
252 char const *ip = (char const *) p;
253
254 h = 0;
255
256 /* Hash this line until we find a newline. */
257 if (ignore_case)
258 switch (ignore_white_space)
259 {
260 case IGNORE_ALL_SPACE:
261 while ((c = *p++) != '\n')
262 if (! ISSPACE (c))
263 h = HASH (h, TOLOWER (c));
264 break;
265
266 case IGNORE_SPACE_CHANGE:
267 while ((c = *p++) != '\n')
268 {
269 if (ISSPACE (c))
270 {
271 do
272 if ((c = *p++) == '\n')
273 goto hashing_done;
274 while (ISSPACE (c));
275
276 h = HASH (h, ' ');
277 }
278
279 /* C is now the first non-space. */
280 h = HASH (h, TOLOWER (c));
281 }
282 break;
283
284 case IGNORE_TAB_EXPANSION:
285 {
286 size_t column = 0;
287 while ((c = *p++) != '\n')
288 {
289 int repetitions = 1;
290
291 switch (c)
292 {
293 case '\b':
294 column -= 0 < column;
295 break;
296
297 case '\t':
298 c = ' ';
299 repetitions = TAB_WIDTH - column % TAB_WIDTH;
300 column += repetitions;
301 break;
302
303 case '\r':
304 column = 0;
305 break;
306
307 default:
308 c = TOLOWER (c);
309 column++;
310 break;
311 }
312
313 do
314 h = HASH (h, c);
315 while (--repetitions != 0);
316 }
317 }
318 break;
319
320 default:
321 while ((c = *p++) != '\n')
322 h = HASH (h, TOLOWER (c));
323 break;
324 }
325 else
326 switch (ignore_white_space)
327 {
328 case IGNORE_ALL_SPACE:
329 while ((c = *p++) != '\n')
330 if (! ISSPACE (c))
331 h = HASH (h, c);
332 break;
333
334 case IGNORE_SPACE_CHANGE:
335 while ((c = *p++) != '\n')
336 {
337 if (ISSPACE (c))
338 {
339 do
340 if ((c = *p++) == '\n')
341 goto hashing_done;
342 while (ISSPACE (c));
343
344 h = HASH (h, ' ');
345 }
346
347 /* C is now the first non-space. */
348 h = HASH (h, c);
349 }
350 break;
351
352 case IGNORE_TAB_EXPANSION:
353 {
354 size_t column = 0;
355 while ((c = *p++) != '\n')
356 {
357 int repetitions = 1;
358
359 switch (c)
360 {
361 case '\b':
362 column -= 0 < column;
363 break;
364
365 case '\t':
366 c = ' ';
367 repetitions = TAB_WIDTH - column % TAB_WIDTH;
368 column += repetitions;
369 break;
370
371 case '\r':
372 column = 0;
373 break;
374
375 default:
376 column++;
377 break;
378 }
379
380 do
381 h = HASH (h, c);
382 while (--repetitions != 0);
383 }
384 }
385 break;
386
387 default:
388 while ((c = *p++) != '\n')
389 h = HASH (h, c);
390 break;
391 }
392
393 hashing_done:;
394
395 bucket = &buckets[h % nbuckets];
396 length = (char const *) p - ip - 1;
397
398 if ((char const *) p == bufend
399 && current->missing_newline
400 && ROBUST_OUTPUT_STYLE (output_style))
401 {
402 /* This line is incomplete. If this is significant,
403 put the line into buckets[-1]. */
404 if (ignore_white_space < IGNORE_SPACE_CHANGE)
405 bucket = &buckets[-1];
406
407 /* Omit the inserted newline when computing linbuf later. */
408 p--;
409 bufend = suffix_begin = (char const *) p;
410 }
411
412 for (i = *bucket; ; i = eqs[i].next)
413 if (!i)
414 {
415 /* Create a new equivalence class in this bucket. */
416 i = eqs_index++;
417 if (i == eqs_alloc)
418 {
419 if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
420 xalloc_die ();
421 eqs_alloc *= 2;
422 eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
423 }
424 eqs[i].next = *bucket;
425 eqs[i].hash = h;
426 eqs[i].line = ip;
427 eqs[i].length = length;
428 *bucket = i;
429 break;
430 }
431 else if (eqs[i].hash == h)
432 {
433 char const *eqline = eqs[i].line;
434
435 /* Reuse existing class if lines_differ reports the lines
436 equal. */
437 if (eqs[i].length == length)
438 {
439 /* Reuse existing equivalence class if the lines are identical.
440 This detects the common case of exact identity
441 faster than lines_differ would. */
442 if (memcmp (eqline, ip, length) == 0)
443 break;
444 if (!same_length_diff_contents_compare_anyway)
445 continue;
446 }
447 else if (!diff_length_compare_anyway)
448 continue;
449
450 if (! lines_differ (eqline, ip))
451 break;
452 }
453
454 /* Maybe increase the size of the line table. */
455 if (line == alloc_lines)
456 {
457 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
458 if (PTRDIFF_MAX / 3 <= alloc_lines
459 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
460 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
461 xalloc_die ();
462 alloc_lines = 2 * alloc_lines - linbuf_base;
463 cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
464 linbuf += linbuf_base;
465 linbuf = xrealloc (linbuf,
466 (alloc_lines - linbuf_base) * sizeof *linbuf);
467 linbuf -= linbuf_base;
468 }
469 linbuf[line] = ip;
470 cureqs[line] = i;
471 ++line;
472 }
473
474 current->buffered_lines = line;
475
476 for (i = 0; ; i++)
477 {
478 /* Record the line start for lines in the suffix that we care about.
479 Record one more line start than lines,
480 so that we can compute the length of any buffered line. */
481 if (line == alloc_lines)
482 {
483 /* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
484 if (PTRDIFF_MAX / 3 <= alloc_lines
485 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
486 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
487 xalloc_die ();
488 alloc_lines = 2 * alloc_lines - linbuf_base;
489 linbuf += linbuf_base;
490 linbuf = xrealloc (linbuf,
491 (alloc_lines - linbuf_base) * sizeof *linbuf);
492 linbuf -= linbuf_base;
493 }
494 linbuf[line] = (char const *) p;
495
496 if ((char const *) p == bufend)
497 break;
498
499 if (context <= i && no_diff_means_no_output)
500 break;
501
502 line++;
503
504 while (*p++ != '\n')
505 continue;
506 }
507
508 /* Done with cache in local variables. */
509 current->linbuf = linbuf;
510 current->valid_lines = line;
511 current->alloc_lines = alloc_lines;
512 current->equivs = cureqs;
513 equivs = eqs;
514 equivs_alloc = eqs_alloc;
515 equivs_index = eqs_index;
516}
517
518
519/* Prepare the text. Make sure the text end is initialized.
520 Make sure text ends in a newline,
521 but remember that we had to add one.
522 Strip trailing CRs, if that was requested. */
523
524static void
525prepare_text (struct file_data *current)
526{
527 size_t buffered = current->buffered;
528 char *p = FILE_BUFFER (current);
529 char *dst;
530
531 if (buffered == 0 || p[buffered - 1] == '\n')
532 current->missing_newline = 0;
533 else
534 {
535 p[buffered++] = '\n';
536 current->missing_newline = 1;
537 }
538
539 if (!p)
540 return;
541
542 /* Don't use uninitialized storage when planting or using sentinels. */
543 memset (p + buffered, 0, sizeof (word));
544
545 if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
546 {
547 char const *src = dst;
548 char const *srclim = p + buffered;
549
550 do
551 dst += ! ((*dst = *src++) == '\r' && *src == '\n');
552 while (src < srclim);
553
554 buffered -= src - dst;
555 }
556
557 current->buffered = buffered;
558}
559
560
561/* We have found N lines in a buffer of size S; guess the
562 proportionate number of lines that will be found in a buffer of
563 size T. However, do not guess a number of lines so large that the
564 resulting line table might cause overflow in size calculations. */
565static lin
566guess_lines (lin n, size_t s, size_t t)
567{
568 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
569 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
570 return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
571}
572
573/* Given a vector of two file_data objects, find the identical
574 prefixes and suffixes of each object. */
575
576static void
577find_identical_ends (struct file_data filevec[])
578{
579 word *w0, *w1;
580 char *p0, *p1, *buffer0, *buffer1;
581 char const *end0, *beg0;
582 char const **linbuf0, **linbuf1;
583 lin i, lines;
584 size_t n0, n1;
585 lin alloc_lines0, alloc_lines1;
586 lin buffered_prefix, prefix_count, prefix_mask;
587 lin middle_guess, suffix_guess;
588
589 slurp (&filevec[0]);
590 prepare_text (&filevec[0]);
591 if (filevec[0].desc != filevec[1].desc)
592 {
593 slurp (&filevec[1]);
594 prepare_text (&filevec[1]);
595 }
596 else
597 {
598 filevec[1].buffer = filevec[0].buffer;
599 filevec[1].bufsize = filevec[0].bufsize;
600 filevec[1].buffered = filevec[0].buffered;
601 filevec[1].missing_newline = filevec[0].missing_newline;
602 }
603
604 /* Find identical prefix. */
605
606 w0 = filevec[0].buffer;
607 w1 = filevec[1].buffer;
608 p0 = buffer0 = (char *) w0;
609 p1 = buffer1 = (char *) w1;
610 n0 = filevec[0].buffered;
611 n1 = filevec[1].buffered;
612
613 if (p0 == p1)
614 /* The buffers are the same; sentinels won't work. */
615 p0 = p1 += n1;
616 else
617 {
618 /* Insert end sentinels, in this case characters that are guaranteed
619 to make the equality test false, and thus terminate the loop. */
620
621 if (n0 < n1)
622 p0[n0] = ~p1[n0];
623 else
624 p1[n1] = ~p0[n1];
625
626 /* Loop until first mismatch, or to the sentinel characters. */
627
628 /* Compare a word at a time for speed. */
629 while (*w0 == *w1)
630 w0++, w1++;
631
632 /* Do the last few bytes of comparison a byte at a time. */
633 p0 = (char *) w0;
634 p1 = (char *) w1;
635 while (*p0 == *p1)
636 p0++, p1++;
637
638 /* Don't mistakenly count missing newline as part of prefix. */
639 if (ROBUST_OUTPUT_STYLE (output_style)
640 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
641 !=
642 (buffer1 + n1 - filevec[1].missing_newline < p1)))
643 p0--, p1--;
644 }
645
646 /* Now P0 and P1 point at the first nonmatching characters. */
647
648 /* Skip back to last line-beginning in the prefix,
649 and then discard up to HORIZON_LINES lines from the prefix. */
650 i = horizon_lines;
651 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
652 p0--, p1--;
653
654 /* Record the prefix. */
655 filevec[0].prefix_end = p0;
656 filevec[1].prefix_end = p1;
657
658 /* Find identical suffix. */
659
660 /* P0 and P1 point beyond the last chars not yet compared. */
661 p0 = buffer0 + n0;
662 p1 = buffer1 + n1;
663
664 if (! ROBUST_OUTPUT_STYLE (output_style)
665 || filevec[0].missing_newline == filevec[1].missing_newline)
666 {
667 end0 = p0; /* Addr of last char in file 0. */
668
669 /* Get value of P0 at which we should stop scanning backward:
670 this is when either P0 or P1 points just past the last char
671 of the identical prefix. */
672 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
673
674 /* Scan back until chars don't match or we reach that point. */
675 for (; p0 != beg0; p0--, p1--)
676 if (*p0 != *p1)
677 {
678 /* Point at the first char of the matching suffix. */
679 beg0 = p0;
680 break;
681 }
682
683 /* Are we at a line-beginning in both files? If not, add the rest of
684 this line to the main body. Discard up to HORIZON_LINES lines from
685 the identical suffix. Also, discard one extra line,
686 because shift_boundaries may need it. */
687 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
688 &&
689 (buffer1 == p1 || p1[-1] == '\n'));
690 while (i-- && p0 != end0)
691 while (*p0++ != '\n')
692 continue;
693
694 p1 += p0 - beg0;
695 }
696
697 /* Record the suffix. */
698 filevec[0].suffix_begin = p0;
699 filevec[1].suffix_begin = p1;
700
701 /* Calculate number of lines of prefix to save.
702
703 prefix_count == 0 means save the whole prefix;
704 we need this for options like -D that output the whole file,
705 or for enormous contexts (to avoid worrying about arithmetic overflow).
706 We also need it for options like -F that output some preceding line;
707 at least we will need to find the last few lines,
708 but since we don't know how many, it's easiest to find them all.
709
710 Otherwise, prefix_count != 0. Save just prefix_count lines at start
711 of the line buffer; they'll be moved to the proper location later.
712 Handle 1 more line than the context says (because we count 1 too many),
713 rounded up to the next power of 2 to speed index computation. */
714
715 if (no_diff_means_no_output && ! function_regexp.fastmap
716 && context < LIN_MAX / 4 && context < n0)
717 {
718 middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
719 suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
720 for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
721 continue;
722 alloc_lines0 = (prefix_count + middle_guess
723 + MIN (context, suffix_guess));
724 }
725 else
726 {
727 prefix_count = 0;
728 alloc_lines0 = guess_lines (0, 0, n0);
729 }
730
731 prefix_mask = prefix_count - 1;
732 lines = 0;
733 linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
734 p0 = buffer0;
735
736 /* If the prefix is needed, find the prefix lines. */
737 if (! (no_diff_means_no_output
738 && filevec[0].prefix_end == p0
739 && filevec[1].prefix_end == p1))
740 {
741 end0 = filevec[0].prefix_end;
742 while (p0 != end0)
743 {
744 lin l = lines++ & prefix_mask;
745 if (l == alloc_lines0)
746 {
747 if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
748 xalloc_die ();
749 alloc_lines0 *= 2;
750 linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
751 }
752 linbuf0[l] = p0;
753 while (*p0++ != '\n')
754 continue;
755 }
756 }
757 buffered_prefix = prefix_count && context < lines ? context : lines;
758
759 /* Allocate line buffer 1. */
760
761 middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
762 suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
763 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
764 if (alloc_lines1 < buffered_prefix
765 || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
766 xalloc_die ();
767 linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
768
769 if (buffered_prefix != lines)
770 {
771 /* Rotate prefix lines to proper location. */
772 for (i = 0; i < buffered_prefix; i++)
773 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
774 for (i = 0; i < buffered_prefix; i++)
775 linbuf0[i] = linbuf1[i];
776 }
777
778 /* Initialize line buffer 1 from line buffer 0. */
779 for (i = 0; i < buffered_prefix; i++)
780 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
781
782 /* Record the line buffer, adjusted so that
783 linbuf[0] points at the first differing line. */
784 filevec[0].linbuf = linbuf0 + buffered_prefix;
785 filevec[1].linbuf = linbuf1 + buffered_prefix;
786 filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
787 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
788 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
789 filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
790}
791
792
793/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
794 than 2**k. This table is derived from Chris K. Caldwell's list
795 <http://www.utm.edu/research/primes/lists/2small/>. */
796
797static unsigned char const prime_offset[] =
798{
799 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
800 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
801 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
802 55, 93, 1, 57, 25
803};
804
805/* Verify that this host's size_t is not too wide for the above table. */
806
807verify (enough_prime_offsets,
808 sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
809
810/* Given a vector of two file_data objects, read the file associated
811 with each one, and build the table of equivalence classes.
812 Return nonzero if either file appears to be a binary file.
813 If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
814
815bool
816read_files (struct file_data filevec[], bool pretend_binary)
817{
818 int i;
819 bool skip_test = text | pretend_binary;
820 bool appears_binary = pretend_binary | sip (&filevec[0], skip_test);
821
822 if (filevec[0].desc != filevec[1].desc)
823 appears_binary |= sip (&filevec[1], skip_test | appears_binary);
824 else
825 {
826 filevec[1].buffer = filevec[0].buffer;
827 filevec[1].bufsize = filevec[0].bufsize;
828 filevec[1].buffered = filevec[0].buffered;
829 }
830 if (appears_binary)
831 {
832 set_binary_mode (filevec[0].desc, 1);
833 set_binary_mode (filevec[1].desc, 1);
834 return 1;
835 }
836
837 find_identical_ends (filevec);
838
839 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
840 if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
841 xalloc_die ();
842 equivs = xmalloc (equivs_alloc * sizeof *equivs);
843 /* Equivalence class 0 is permanently safe for lines that were not
844 hashed. Real equivalence classes start at 1. */
845 equivs_index = 1;
846
847 /* Allocate (one plus) a prime number of hash buckets. Use a prime
848 number between 1/3 and 2/3 of the value of equiv_allocs,
849 approximately. */
850 for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
851 continue;
852 nbuckets = ((size_t) 1 << i) - prime_offset[i];
853 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
854 xalloc_die ();
855 buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
856 buckets++;
857
858 for (i = 0; i < 2; i++)
859 find_and_hash_each_line (&filevec[i]);
860
861 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
862
863 free (equivs);
864 free (buckets - 1);
865
866 return 0;
867}
Note: See TracBrowser for help on using the repository browser.