Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

io.c@ 2828

Visit:

Last change on this file since 2828 was 2556, checked in by bird, 20 years ago
diffutils 2.8.1
File size: 23.6 KB

Line
1	/* File I/O for GNU DIFF.
2
3	Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4	Free Software Foundation, Inc.
5
6	This file is part of GNU DIFF.
7
8	GNU DIFF is free software; you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation; either version 2, or (at your option)
11	any later version.
12
13	GNU DIFF is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with this program; see the file COPYING.
20	If not, write to the Free Software Foundation,
21	59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23	#include "diff.h"
24	#include <cmpbuf.h>
25	#include <regex.h>
26	#include <setmode.h>
27	#include <xalloc.h>
28
29	/* Rotate an unsigned value to the left. */
30	#define ROL(v, n) ((v) << (n) \| (v) >> (sizeof (v) * CHAR_BIT - (n)))
31
32	/* Given a hash value and a new character, return a new hash value. */
33	#define HASH(h, c) ((c) + ROL (h, 7))
34
35
36	/* The type of a hash value. */
37	typedef size_t hash_value;
38	verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
39
40	/* Lines are put into equivalence classes of lines that match in lines_differ.
41	Each equivalence class is represented by one of these structures,
42	but only while the classes are being computed.
43	Afterward, each class is represented by a number. */
44	struct equivclass
45	{
46	lin next; /* Next item in this bucket. */
47	hash_value hash; /* Hash of lines in this class. */
48	char const line; / A line that fits this class. */
49	size_t length; /* That line's length, not counting its newline. */
50	};
51
52	/* Hash-table: array of buckets, each being a chain of equivalence classes.
53	buckets[-1] is reserved for incomplete lines. */
54	static lin *buckets;
55
56	/* Number of buckets in the hash table array, not counting buckets[-1]. */
57	static size_t nbuckets;
58
59	/* Array in which the equivalence classes are allocated.
60	The bucket-chains go through the elements in this array.
61	The number of an equivalence class is its index in this array. */
62	static struct equivclass *equivs;
63
64	/* Index of first free element in the array `equivs'. */
65	static lin equivs_index;
66
67	/* Number of elements allocated in the array `equivs'. */
68	static lin equivs_alloc;
69
70
71	/* Read a block of data into a file buffer, checking for EOF and error. */
72
73	void
74	file_block_read (struct file_data *current, size_t size)
75	{
76	if (size && ! current->eof)
77	{
78	size_t s = block_read (current->desc,
79	FILE_BUFFER (current) + current->buffered, size);
80	if (s == SIZE_MAX)
81	pfatal_with_name (current->name);
82	current->buffered += s;
83	current->eof = s < size;
84	}
85	}
86
87
88	/* Check for binary files and compare them for exact identity. */
89
90	/* Return 1 if BUF contains a non text character.
91	SIZE is the number of characters in BUF. */
92
93	#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
94
95	/* Get ready to read the current file.
96	Return nonzero if SKIP_TEST is zero,
97	and if it appears to be a binary file. */
98
99	static bool
100	sip (struct file_data *current, bool skip_test)
101	{
102	/* If we have a nonexistent file at this stage, treat it as empty. */
103	if (current->desc < 0)
104	{
105	/* Leave room for a sentinel. */
106	current->bufsize = sizeof (word);
107	current->buffer = xmalloc (current->bufsize);
108	}
109	else
110	{
111	current->bufsize = buffer_lcm (sizeof (word),
112	STAT_BLOCKSIZE (current->stat),
113	PTRDIFF_MAX - 2 * sizeof (word));
114	current->buffer = xmalloc (current->bufsize);
115
116	if (! skip_test)
117	{
118	/* Check first part of file to see if it's a binary file. */
119
120	bool was_binary = set_binary_mode (current->desc, 1);
121	off_t buffered;
122	file_block_read (current, current->bufsize);
123	buffered = current->buffered;
124
125	if (! was_binary)
126	{
127	/* Revert to text mode and seek back to the beginning to
128	reread the file. Use relative seek, since file
129	descriptors like stdin might not start at offset
130	zero. */
131
132	if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
133	pfatal_with_name (current->name);
134	set_binary_mode (current->desc, 0);
135	current->buffered = 0;
136	current->eof = 0;
137	}
138
139	return binary_file_p (current->buffer, buffered);
140	}
141	}
142
143	current->buffered = 0;
144	current->eof = 0;
145	return 0;
146	}
147
148	/* Slurp the rest of the current file completely into memory. */
149
150	static void
151	slurp (struct file_data *current)
152	{
153	size_t cc;
154
155	if (current->desc < 0)
156	{
157	/* The file is nonexistent. */
158	return;
159	}
160
161	if (S_ISREG (current->stat.st_mode))
162	{
163	/* It's a regular file; slurp in the rest all at once. */
164
165	/* Get the size out of the stat block.
166	Allocate just enough room for appended newline plus word sentinel,
167	plus word-alignment since we want the buffer word-aligned. */
168	size_t file_size = current->stat.st_size;
169	cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
170	if (file_size != current->stat.st_size \|\| cc < file_size
171	\|\| PTRDIFF_MAX <= cc)
172	xalloc_die ();
173
174	if (current->bufsize < cc)
175	{
176	current->bufsize = cc;
177	current->buffer = xrealloc (current->buffer, cc);
178	}
179
180	/* Try to read at least 1 more byte than the size indicates, to
181	detect whether the file is growing. This is a nicety for
182	users who run 'diff' on files while they are changing. */
183
184	if (current->buffered <= file_size)
185	{
186	file_block_read (current, file_size + 1 - current->buffered);
187	if (current->buffered <= file_size)
188	return;
189	}
190	}
191
192	/* It's not a regular file, or it's a growing regular file; read it,
193	growing the buffer as needed. */
194
195	file_block_read (current, current->bufsize - current->buffered);
196
197	if (current->buffered)
198	{
199	while (current->buffered == current->bufsize)
200	{
201	if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
202	xalloc_die ();
203	current->bufsize *= 2;
204	current->buffer = xrealloc (current->buffer, current->bufsize);
205	file_block_read (current, current->bufsize - current->buffered);
206	}
207
208	/* Allocate just enough room for appended newline plus word
209	sentinel, plus word-alignment. */
210	cc = current->buffered + 2 * sizeof (word);
211	current->bufsize = cc - cc % sizeof (word);
212	current->buffer = xrealloc (current->buffer, current->bufsize);
213	}
214	}
215
216
217	/* Split the file into lines, simultaneously computing the equivalence
218	class for each line. */
219
220	static void
221	find_and_hash_each_line (struct file_data *current)
222	{
223	hash_value h;
224	unsigned char const p = (unsigned char const ) current->prefix_end;
225	unsigned char c;
226	lin i, *bucket;
227	size_t length;
228
229	/* Cache often-used quantities in local variables to help the compiler. */
230	char const **linbuf = current->linbuf;
231	lin alloc_lines = current->alloc_lines;
232	lin line = 0;
233	lin linbuf_base = current->linbuf_base;
234	lin cureqs = xmalloc (alloc_lines sizeof *cureqs);
235	struct equivclass *eqs = equivs;
236	lin eqs_index = equivs_index;
237	lin eqs_alloc = equivs_alloc;
238	char const *suffix_begin = current->suffix_begin;
239	char const *bufend = FILE_BUFFER (current) + current->buffered;
240	bool diff_length_compare_anyway =
241	ignore_white_space != IGNORE_NO_WHITE_SPACE;
242	bool same_length_diff_contents_compare_anyway =
243	diff_length_compare_anyway \| ignore_case;
244
245	while ((char const *) p < suffix_begin)
246	{
247	char const ip = (char const ) p;
248
249	h = 0;
250
251	/* Hash this line until we find a newline. */
252	if (ignore_case)
253	switch (ignore_white_space)
254	{
255	case IGNORE_ALL_SPACE:
256	while ((c = *p++) != '\n')
257	if (! ISSPACE (c))
258	h = HASH (h, TOLOWER (c));
259	break;
260
261	case IGNORE_SPACE_CHANGE:
262	while ((c = *p++) != '\n')
263	{
264	if (ISSPACE (c))
265	{
266	do
267	if ((c = *p++) == '\n')
268	goto hashing_done;
269	while (ISSPACE (c));
270
271	h = HASH (h, ' ');
272	}
273
274	/* C is now the first non-space. */
275	h = HASH (h, TOLOWER (c));
276	}
277	break;
278
279	case IGNORE_TAB_EXPANSION:
280	{
281	size_t column = 0;
282	while ((c = *p++) != '\n')
283	{
284	int repetitions = 1;
285
286	switch (c)
287	{
288	case '\b':
289	column -= 0 < column;
290	break;
291
292	case '\t':
293	c = ' ';
294	repetitions = TAB_WIDTH - column % TAB_WIDTH;
295	column += repetitions;
296	break;
297
298	case '\r':
299	column = 0;
300	break;
301
302	default:
303	c = TOLOWER (c);
304	column++;
305	break;
306	}
307
308	do
309	h = HASH (h, c);
310	while (--repetitions != 0);
311	}
312	}
313	break;
314
315	default:
316	while ((c = *p++) != '\n')
317	h = HASH (h, TOLOWER (c));
318	break;
319	}
320	else
321	switch (ignore_white_space)
322	{
323	case IGNORE_ALL_SPACE:
324	while ((c = *p++) != '\n')
325	if (! ISSPACE (c))
326	h = HASH (h, c);
327	break;
328
329	case IGNORE_SPACE_CHANGE:
330	while ((c = *p++) != '\n')
331	{
332	if (ISSPACE (c))
333	{
334	do
335	if ((c = *p++) == '\n')
336	goto hashing_done;
337	while (ISSPACE (c));
338
339	h = HASH (h, ' ');
340	}
341
342	/* C is now the first non-space. */
343	h = HASH (h, c);
344	}
345	break;
346
347	case IGNORE_TAB_EXPANSION:
348	{
349	size_t column = 0;
350	while ((c = *p++) != '\n')
351	{
352	int repetitions = 1;
353
354	switch (c)
355	{
356	case '\b':
357	column -= 0 < column;
358	break;
359
360	case '\t':
361	c = ' ';
362	repetitions = TAB_WIDTH - column % TAB_WIDTH;
363	column += repetitions;
364	break;
365
366	case '\r':
367	column = 0;
368	break;
369
370	default:
371	column++;
372	break;
373	}
374
375	do
376	h = HASH (h, c);
377	while (--repetitions != 0);
378	}
379	}
380	break;
381
382	default:
383	while ((c = *p++) != '\n')
384	h = HASH (h, c);
385	break;
386	}
387
388	hashing_done:;
389
390	bucket = &buckets[h % nbuckets];
391	length = (char const *) p - ip - 1;
392
393	if ((char const *) p == bufend
394	&& current->missing_newline
395	&& ROBUST_OUTPUT_STYLE (output_style))
396	{
397	/* This line is incomplete. If this is significant,
398	put the line into buckets[-1]. */
399	if (ignore_white_space < IGNORE_SPACE_CHANGE)
400	bucket = &buckets[-1];
401
402	/* Omit the inserted newline when computing linbuf later. */
403	p--;
404	bufend = suffix_begin = (char const *) p;
405	}
406
407	for (i = *bucket; ; i = eqs[i].next)
408	if (!i)
409	{
410	/* Create a new equivalence class in this bucket. */
411	i = eqs_index++;
412	if (i == eqs_alloc)
413	{
414	if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
415	xalloc_die ();
416	eqs_alloc *= 2;
417	eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
418	}
419	eqs[i].next = *bucket;
420	eqs[i].hash = h;
421	eqs[i].line = ip;
422	eqs[i].length = length;
423	*bucket = i;
424	break;
425	}
426	else if (eqs[i].hash == h)
427	{
428	char const *eqline = eqs[i].line;
429
430	/* Reuse existing class if lines_differ reports the lines
431	equal. */
432	if (eqs[i].length == length)
433	{
434	/* Reuse existing equivalence class if the lines are identical.
435	This detects the common case of exact identity
436	faster than lines_differ would. */
437	if (memcmp (eqline, ip, length) == 0)
438	break;
439	if (!same_length_diff_contents_compare_anyway)
440	continue;
441	}
442	else if (!diff_length_compare_anyway)
443	continue;
444
445	if (! lines_differ (eqline, ip))
446	break;
447	}
448
449	/* Maybe increase the size of the line table. */
450	if (line == alloc_lines)
451	{
452	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
453	if (PTRDIFF_MAX / 3 <= alloc_lines
454	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
455	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
456	xalloc_die ();
457	alloc_lines = 2 * alloc_lines - linbuf_base;
458	cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
459	linbuf += linbuf_base;
460	linbuf = xrealloc (linbuf,
461	(alloc_lines - linbuf_base) * sizeof *linbuf);
462	linbuf -= linbuf_base;
463	}
464	linbuf[line] = ip;
465	cureqs[line] = i;
466	++line;
467	}
468
469	current->buffered_lines = line;
470
471	for (i = 0; ; i++)
472	{
473	/* Record the line start for lines in the suffix that we care about.
474	Record one more line start than lines,
475	so that we can compute the length of any buffered line. */
476	if (line == alloc_lines)
477	{
478	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
479	if (PTRDIFF_MAX / 3 <= alloc_lines
480	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
481	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
482	xalloc_die ();
483	alloc_lines = 2 * alloc_lines - linbuf_base;
484	linbuf += linbuf_base;
485	linbuf = xrealloc (linbuf,
486	(alloc_lines - linbuf_base) * sizeof *linbuf);
487	linbuf -= linbuf_base;
488	}
489	linbuf[line] = (char const *) p;
490
491	if ((char const *) p == bufend)
492	break;
493
494	if (context <= i && no_diff_means_no_output)
495	break;
496
497	line++;
498
499	while (*p++ != '\n')
500	continue;
501	}
502
503	/* Done with cache in local variables. */
504	current->linbuf = linbuf;
505	current->valid_lines = line;
506	current->alloc_lines = alloc_lines;
507	current->equivs = cureqs;
508	equivs = eqs;
509	equivs_alloc = eqs_alloc;
510	equivs_index = eqs_index;
511	}
512
513
514	/* Prepare the text. Make sure the text end is initialized.
515	Make sure text ends in a newline,
516	but remember that we had to add one.
517	Strip trailing CRs, if that was requested. */
518
519	static void
520	prepare_text (struct file_data *current)
521	{
522	size_t buffered = current->buffered;
523	char *p = FILE_BUFFER (current);
524	char *dst;
525
526	if (buffered == 0 \|\| p[buffered - 1] == '\n')
527	current->missing_newline = 0;
528	else
529	{
530	p[buffered++] = '\n';
531	current->missing_newline = 1;
532	}
533
534	if (!p)
535	return;
536
537	/* Don't use uninitialized storage when planting or using sentinels. */
538	memset (p + buffered, 0, sizeof (word));
539
540	if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
541	{
542	char const *src = dst;
543	char const *srclim = p + buffered;
544
545	do
546	dst += ! ((dst = src++) == '\r' && *src == '\n');
547	while (src < srclim);
548
549	buffered -= src - dst;
550	}
551
552	current->buffered = buffered;
553	}
554
555
556	/* We have found N lines in a buffer of size S; guess the
557	proportionate number of lines that will be found in a buffer of
558	size T. However, do not guess a number of lines so large that the
559	resulting line table might cause overflow in size calculations. */
560	static lin
561	guess_lines (lin n, size_t s, size_t t)
562	{
563	size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
564	lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
565	return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
566	}
567
568	/* Given a vector of two file_data objects, find the identical
569	prefixes and suffixes of each object. */
570
571	static void
572	find_identical_ends (struct file_data filevec[])
573	{
574	word w0, w1;
575	char p0, p1, buffer0, buffer1;
576	char const end0, beg0;
577	char const linbuf0, linbuf1;
578	lin i, lines;
579	size_t n0, n1;
580	lin alloc_lines0, alloc_lines1;
581	lin buffered_prefix, prefix_count, prefix_mask;
582	lin middle_guess, suffix_guess;
583
584	slurp (&filevec[0]);
585	prepare_text (&filevec[0]);
586	if (filevec[0].desc != filevec[1].desc)
587	{
588	slurp (&filevec[1]);
589	prepare_text (&filevec[1]);
590	}
591	else
592	{
593	filevec[1].buffer = filevec[0].buffer;
594	filevec[1].bufsize = filevec[0].bufsize;
595	filevec[1].buffered = filevec[0].buffered;
596	filevec[1].missing_newline = filevec[0].missing_newline;
597	}
598
599	/* Find identical prefix. */
600
601	w0 = filevec[0].buffer;
602	w1 = filevec[1].buffer;
603	p0 = buffer0 = (char *) w0;
604	p1 = buffer1 = (char *) w1;
605	n0 = filevec[0].buffered;
606	n1 = filevec[1].buffered;
607
608	if (p0 == p1)
609	/* The buffers are the same; sentinels won't work. */
610	p0 = p1 += n1;
611	else
612	{
613	/* Insert end sentinels, in this case characters that are guaranteed
614	to make the equality test false, and thus terminate the loop. */
615
616	if (n0 < n1)
617	p0[n0] = ~p1[n0];
618	else
619	p1[n1] = ~p0[n1];
620
621	/* Loop until first mismatch, or to the sentinel characters. */
622
623	/* Compare a word at a time for speed. */
624	while (w0 == w1)
625	w0++, w1++;
626
627	/* Do the last few bytes of comparison a byte at a time. */
628	p0 = (char *) w0;
629	p1 = (char *) w1;
630	while (p0 == p1)
631	p0++, p1++;
632
633	/* Don't mistakenly count missing newline as part of prefix. */
634	if (ROBUST_OUTPUT_STYLE (output_style)
635	&& ((buffer0 + n0 - filevec[0].missing_newline < p0)
636	!=
637	(buffer1 + n1 - filevec[1].missing_newline < p1)))
638	p0--, p1--;
639	}
640
641	/* Now P0 and P1 point at the first nonmatching characters. */
642
643	/* Skip back to last line-beginning in the prefix,
644	and then discard up to HORIZON_LINES lines from the prefix. */
645	i = horizon_lines;
646	while (p0 != buffer0 && (p0[-1] != '\n' \|\| i--))
647	p0--, p1--;
648
649	/* Record the prefix. */
650	filevec[0].prefix_end = p0;
651	filevec[1].prefix_end = p1;
652
653	/* Find identical suffix. */
654
655	/* P0 and P1 point beyond the last chars not yet compared. */
656	p0 = buffer0 + n0;
657	p1 = buffer1 + n1;
658
659	if (! ROBUST_OUTPUT_STYLE (output_style)
660	\|\| filevec[0].missing_newline == filevec[1].missing_newline)
661	{
662	end0 = p0; /* Addr of last char in file 0. */
663
664	/* Get value of P0 at which we should stop scanning backward:
665	this is when either P0 or P1 points just past the last char
666	of the identical prefix. */
667	beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
668
669	/* Scan back until chars don't match or we reach that point. */
670	for (; p0 != beg0; p0--, p1--)
671	if (p0 != p1)
672	{
673	/* Point at the first char of the matching suffix. */
674	beg0 = p0;
675	break;
676	}
677
678	/* Are we at a line-beginning in both files? If not, add the rest of
679	this line to the main body. Discard up to HORIZON_LINES lines from
680	the identical suffix. Also, discard one extra line,
681	because shift_boundaries may need it. */
682	i = horizon_lines + !((buffer0 == p0 \|\| p0[-1] == '\n')
683	&&
684	(buffer1 == p1 \|\| p1[-1] == '\n'));
685	while (i-- && p0 != end0)
686	while (*p0++ != '\n')
687	continue;
688
689	p1 += p0 - beg0;
690	}
691
692	/* Record the suffix. */
693	filevec[0].suffix_begin = p0;
694	filevec[1].suffix_begin = p1;
695
696	/* Calculate number of lines of prefix to save.
697
698	prefix_count == 0 means save the whole prefix;
699	we need this for options like -D that output the whole file,
700	or for enormous contexts (to avoid worrying about arithmetic overflow).
701	We also need it for options like -F that output some preceding line;
702	at least we will need to find the last few lines,
703	but since we don't know how many, it's easiest to find them all.
704
705	Otherwise, prefix_count != 0. Save just prefix_count lines at start
706	of the line buffer; they'll be moved to the proper location later.
707	Handle 1 more line than the context says (because we count 1 too many),
708	rounded up to the next power of 2 to speed index computation. */
709
710	if (no_diff_means_no_output && ! function_regexp.fastmap
711	&& context < LIN_MAX / 4 && context < n0)
712	{
713	middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
714	suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
715	for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
716	continue;
717	alloc_lines0 = (prefix_count + middle_guess
718	+ MIN (context, suffix_guess));
719	}
720	else
721	{
722	prefix_count = 0;
723	alloc_lines0 = guess_lines (0, 0, n0);
724	}
725
726	prefix_mask = prefix_count - 1;
727	lines = 0;
728	linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
729	p0 = buffer0;
730
731	/* If the prefix is needed, find the prefix lines. */
732	if (! (no_diff_means_no_output
733	&& filevec[0].prefix_end == p0
734	&& filevec[1].prefix_end == p1))
735	{
736	end0 = filevec[0].prefix_end;
737	while (p0 != end0)
738	{
739	lin l = lines++ & prefix_mask;
740	if (l == alloc_lines0)
741	{
742	if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
743	xalloc_die ();
744	alloc_lines0 *= 2;
745	linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
746	}
747	linbuf0[l] = p0;
748	while (*p0++ != '\n')
749	continue;
750	}
751	}
752	buffered_prefix = prefix_count && context < lines ? context : lines;
753
754	/* Allocate line buffer 1. */
755
756	middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
757	suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
758	alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
759	if (alloc_lines1 < buffered_prefix
760	\|\| PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
761	xalloc_die ();
762	linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
763
764	if (buffered_prefix != lines)
765	{
766	/* Rotate prefix lines to proper location. */
767	for (i = 0; i < buffered_prefix; i++)
768	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
769	for (i = 0; i < buffered_prefix; i++)
770	linbuf0[i] = linbuf1[i];
771	}
772
773	/* Initialize line buffer 1 from line buffer 0. */
774	for (i = 0; i < buffered_prefix; i++)
775	linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
776
777	/* Record the line buffer, adjusted so that
778	linbuf[0] points at the first differing line. */
779	filevec[0].linbuf = linbuf0 + buffered_prefix;
780	filevec[1].linbuf = linbuf1 + buffered_prefix;
781	filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
782	filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
783	filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
784	filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
785	}
786
787
788	/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
789	than 2**k. This table is derived from Chris K. Caldwell's list
790	<http://www.utm.edu/research/primes/lists/2small/>. */
791
792	static unsigned char const prime_offset[] =
793	{
794	0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
795	15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
796	11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
797	55, 93, 1, 57, 25
798	};
799
800	/* Verify that this host's size_t is not too wide for the above table. */
801
802	verify (enough_prime_offsets,
803	sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
804
805	/* Given a vector of two file_data objects, read the file associated
806	with each one, and build the table of equivalence classes.
807	Return nonzero if either file appears to be a binary file.
808	If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
809
810	bool
811	read_files (struct file_data filevec[], bool pretend_binary)
812	{
813	int i;
814	bool skip_test = text \| pretend_binary;
815	bool appears_binary = pretend_binary \| sip (&filevec[0], skip_test);
816
817	if (filevec[0].desc != filevec[1].desc)
818	appears_binary \|= sip (&filevec[1], skip_test \| appears_binary);
819	else
820	{
821	filevec[1].buffer = filevec[0].buffer;
822	filevec[1].bufsize = filevec[0].bufsize;
823	filevec[1].buffered = filevec[0].buffered;
824	}
825	if (appears_binary)
826	{
827	set_binary_mode (filevec[0].desc, 1);
828	set_binary_mode (filevec[1].desc, 1);
829	return 1;
830	}
831
832	find_identical_ends (filevec);
833
834	equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
835	if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
836	xalloc_die ();
837	equivs = xmalloc (equivs_alloc * sizeof *equivs);
838	/* Equivalence class 0 is permanently safe for lines that were not
839	hashed. Real equivalence classes start at 1. */
840	equivs_index = 1;
841
842	/* Allocate (one plus) a prime number of hash buckets. Use a prime
843	number between 1/3 and 2/3 of the value of equiv_allocs,
844	approximately. */
845	for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
846	continue;
847	nbuckets = ((size_t) 1 << i) - prime_offset[i];
848	if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
849	xalloc_die ();
850	buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
851	buckets++;
852
853	for (i = 0; i < 2; i++)
854	find_and_hash_each_line (&filevec[i]);
855
856	filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
857
858	free (equivs);
859	free (buckets - 1);
860
861	return 0;
862	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/diffutils/src/io.c@ 2828

Download in other formats: