Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

io.c

Visit:

Last change on this file was 3079, checked in by bird, 18 years ago
don't mess with non-seekable files.
File size: 23.7 KB

Line
1	/* File I/O for GNU DIFF.
2
3	Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4	Free Software Foundation, Inc.
5
6	This file is part of GNU DIFF.
7
8	GNU DIFF is free software; you can redistribute it and/or modify
9	it under the terms of the GNU General Public License as published by
10	the Free Software Foundation; either version 2, or (at your option)
11	any later version.
12
13	GNU DIFF is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with this program; see the file COPYING.
20	If not, write to the Free Software Foundation,
21	59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23	#include "diff.h"
24	#include <cmpbuf.h>
25	#include <regex.h>
26	#include <setmode.h>
27	#include <xalloc.h>
28
29	/* Rotate an unsigned value to the left. */
30	#define ROL(v, n) ((v) << (n) \| (v) >> (sizeof (v) * CHAR_BIT - (n)))
31
32	/* Given a hash value and a new character, return a new hash value. */
33	#define HASH(h, c) ((c) + ROL (h, 7))
34
35
36	/* The type of a hash value. */
37	typedef size_t hash_value;
38	verify (hash_value_is_unsigned, ! TYPE_SIGNED (hash_value));
39
40	/* Lines are put into equivalence classes of lines that match in lines_differ.
41	Each equivalence class is represented by one of these structures,
42	but only while the classes are being computed.
43	Afterward, each class is represented by a number. */
44	struct equivclass
45	{
46	lin next; /* Next item in this bucket. */
47	hash_value hash; /* Hash of lines in this class. */
48	char const line; / A line that fits this class. */
49	size_t length; /* That line's length, not counting its newline. */
50	};
51
52	/* Hash-table: array of buckets, each being a chain of equivalence classes.
53	buckets[-1] is reserved for incomplete lines. */
54	static lin *buckets;
55
56	/* Number of buckets in the hash table array, not counting buckets[-1]. */
57	static size_t nbuckets;
58
59	/* Array in which the equivalence classes are allocated.
60	The bucket-chains go through the elements in this array.
61	The number of an equivalence class is its index in this array. */
62	static struct equivclass *equivs;
63
64	/* Index of first free element in the array `equivs'. */
65	static lin equivs_index;
66
67	/* Number of elements allocated in the array `equivs'. */
68	static lin equivs_alloc;
69
70
71	/* Read a block of data into a file buffer, checking for EOF and error. */
72
73	void
74	file_block_read (struct file_data *current, size_t size)
75	{
76	if (size && ! current->eof)
77	{
78	size_t s = block_read (current->desc,
79	FILE_BUFFER (current) + current->buffered, size);
80	if (s == SIZE_MAX)
81	pfatal_with_name (current->name);
82	current->buffered += s;
83	current->eof = s < size;
84	}
85	}
86
87
88	/* Check for binary files and compare them for exact identity. */
89
90	/* Return 1 if BUF contains a non text character.
91	SIZE is the number of characters in BUF. */
92
93	#define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
94
95	/* Get ready to read the current file.
96	Return nonzero if SKIP_TEST is zero,
97	and if it appears to be a binary file. */
98
99	static bool
100	sip (struct file_data *current, bool skip_test)
101	{
102	/* If we have a nonexistent file at this stage, treat it as empty. */
103	if (current->desc < 0)
104	{
105	/* Leave room for a sentinel. */
106	current->bufsize = sizeof (word);
107	current->buffer = xmalloc (current->bufsize);
108	}
109	else
110	{
111	current->bufsize = buffer_lcm (sizeof (word),
112	STAT_BLOCKSIZE (current->stat),
113	PTRDIFF_MAX - 2 * sizeof (word));
114	current->buffer = xmalloc (current->bufsize);
115
116	#ifdef __EMX__
117	if (! skip_test
118	&& ! lseek (current->desc, 0, SEEK_CUR))
119	#else
120	if (! skip_test)
121	#endif
122	{
123	/* Check first part of file to see if it's a binary file. */
124
125	bool was_binary = set_binary_mode (current->desc, 1);
126	off_t buffered;
127	file_block_read (current, current->bufsize);
128	buffered = current->buffered;
129
130	if (! was_binary)
131	{
132	/* Revert to text mode and seek back to the beginning to
133	reread the file. Use relative seek, since file
134	descriptors like stdin might not start at offset
135	zero. */
136
137	if (lseek (current->desc, - buffered, SEEK_CUR) == -1)
138	pfatal_with_name (current->name);
139	set_binary_mode (current->desc, 0);
140	current->buffered = 0;
141	current->eof = 0;
142	}
143
144	return binary_file_p (current->buffer, buffered);
145	}
146	}
147
148	current->buffered = 0;
149	current->eof = 0;
150	return 0;
151	}
152
153	/* Slurp the rest of the current file completely into memory. */
154
155	static void
156	slurp (struct file_data *current)
157	{
158	size_t cc;
159
160	if (current->desc < 0)
161	{
162	/* The file is nonexistent. */
163	return;
164	}
165
166	if (S_ISREG (current->stat.st_mode))
167	{
168	/* It's a regular file; slurp in the rest all at once. */
169
170	/* Get the size out of the stat block.
171	Allocate just enough room for appended newline plus word sentinel,
172	plus word-alignment since we want the buffer word-aligned. */
173	size_t file_size = current->stat.st_size;
174	cc = file_size + 2 * sizeof (word) - file_size % sizeof (word);
175	if (file_size != current->stat.st_size \|\| cc < file_size
176	\|\| PTRDIFF_MAX <= cc)
177	xalloc_die ();
178
179	if (current->bufsize < cc)
180	{
181	current->bufsize = cc;
182	current->buffer = xrealloc (current->buffer, cc);
183	}
184
185	/* Try to read at least 1 more byte than the size indicates, to
186	detect whether the file is growing. This is a nicety for
187	users who run 'diff' on files while they are changing. */
188
189	if (current->buffered <= file_size)
190	{
191	file_block_read (current, file_size + 1 - current->buffered);
192	if (current->buffered <= file_size)
193	return;
194	}
195	}
196
197	/* It's not a regular file, or it's a growing regular file; read it,
198	growing the buffer as needed. */
199
200	file_block_read (current, current->bufsize - current->buffered);
201
202	if (current->buffered)
203	{
204	while (current->buffered == current->bufsize)
205	{
206	if (PTRDIFF_MAX / 2 - sizeof (word) < current->bufsize)
207	xalloc_die ();
208	current->bufsize *= 2;
209	current->buffer = xrealloc (current->buffer, current->bufsize);
210	file_block_read (current, current->bufsize - current->buffered);
211	}
212
213	/* Allocate just enough room for appended newline plus word
214	sentinel, plus word-alignment. */
215	cc = current->buffered + 2 * sizeof (word);
216	current->bufsize = cc - cc % sizeof (word);
217	current->buffer = xrealloc (current->buffer, current->bufsize);
218	}
219	}
220
221
222	/* Split the file into lines, simultaneously computing the equivalence
223	class for each line. */
224
225	static void
226	find_and_hash_each_line (struct file_data *current)
227	{
228	hash_value h;
229	unsigned char const p = (unsigned char const ) current->prefix_end;
230	unsigned char c;
231	lin i, *bucket;
232	size_t length;
233
234	/* Cache often-used quantities in local variables to help the compiler. */
235	char const **linbuf = current->linbuf;
236	lin alloc_lines = current->alloc_lines;
237	lin line = 0;
238	lin linbuf_base = current->linbuf_base;
239	lin cureqs = xmalloc (alloc_lines sizeof *cureqs);
240	struct equivclass *eqs = equivs;
241	lin eqs_index = equivs_index;
242	lin eqs_alloc = equivs_alloc;
243	char const *suffix_begin = current->suffix_begin;
244	char const *bufend = FILE_BUFFER (current) + current->buffered;
245	bool diff_length_compare_anyway =
246	ignore_white_space != IGNORE_NO_WHITE_SPACE;
247	bool same_length_diff_contents_compare_anyway =
248	diff_length_compare_anyway \| ignore_case;
249
250	while ((char const *) p < suffix_begin)
251	{
252	char const ip = (char const ) p;
253
254	h = 0;
255
256	/* Hash this line until we find a newline. */
257	if (ignore_case)
258	switch (ignore_white_space)
259	{
260	case IGNORE_ALL_SPACE:
261	while ((c = *p++) != '\n')
262	if (! ISSPACE (c))
263	h = HASH (h, TOLOWER (c));
264	break;
265
266	case IGNORE_SPACE_CHANGE:
267	while ((c = *p++) != '\n')
268	{
269	if (ISSPACE (c))
270	{
271	do
272	if ((c = *p++) == '\n')
273	goto hashing_done;
274	while (ISSPACE (c));
275
276	h = HASH (h, ' ');
277	}
278
279	/* C is now the first non-space. */
280	h = HASH (h, TOLOWER (c));
281	}
282	break;
283
284	case IGNORE_TAB_EXPANSION:
285	{
286	size_t column = 0;
287	while ((c = *p++) != '\n')
288	{
289	int repetitions = 1;
290
291	switch (c)
292	{
293	case '\b':
294	column -= 0 < column;
295	break;
296
297	case '\t':
298	c = ' ';
299	repetitions = TAB_WIDTH - column % TAB_WIDTH;
300	column += repetitions;
301	break;
302
303	case '\r':
304	column = 0;
305	break;
306
307	default:
308	c = TOLOWER (c);
309	column++;
310	break;
311	}
312
313	do
314	h = HASH (h, c);
315	while (--repetitions != 0);
316	}
317	}
318	break;
319
320	default:
321	while ((c = *p++) != '\n')
322	h = HASH (h, TOLOWER (c));
323	break;
324	}
325	else
326	switch (ignore_white_space)
327	{
328	case IGNORE_ALL_SPACE:
329	while ((c = *p++) != '\n')
330	if (! ISSPACE (c))
331	h = HASH (h, c);
332	break;
333
334	case IGNORE_SPACE_CHANGE:
335	while ((c = *p++) != '\n')
336	{
337	if (ISSPACE (c))
338	{
339	do
340	if ((c = *p++) == '\n')
341	goto hashing_done;
342	while (ISSPACE (c));
343
344	h = HASH (h, ' ');
345	}
346
347	/* C is now the first non-space. */
348	h = HASH (h, c);
349	}
350	break;
351
352	case IGNORE_TAB_EXPANSION:
353	{
354	size_t column = 0;
355	while ((c = *p++) != '\n')
356	{
357	int repetitions = 1;
358
359	switch (c)
360	{
361	case '\b':
362	column -= 0 < column;
363	break;
364
365	case '\t':
366	c = ' ';
367	repetitions = TAB_WIDTH - column % TAB_WIDTH;
368	column += repetitions;
369	break;
370
371	case '\r':
372	column = 0;
373	break;
374
375	default:
376	column++;
377	break;
378	}
379
380	do
381	h = HASH (h, c);
382	while (--repetitions != 0);
383	}
384	}
385	break;
386
387	default:
388	while ((c = *p++) != '\n')
389	h = HASH (h, c);
390	break;
391	}
392
393	hashing_done:;
394
395	bucket = &buckets[h % nbuckets];
396	length = (char const *) p - ip - 1;
397
398	if ((char const *) p == bufend
399	&& current->missing_newline
400	&& ROBUST_OUTPUT_STYLE (output_style))
401	{
402	/* This line is incomplete. If this is significant,
403	put the line into buckets[-1]. */
404	if (ignore_white_space < IGNORE_SPACE_CHANGE)
405	bucket = &buckets[-1];
406
407	/* Omit the inserted newline when computing linbuf later. */
408	p--;
409	bufend = suffix_begin = (char const *) p;
410	}
411
412	for (i = *bucket; ; i = eqs[i].next)
413	if (!i)
414	{
415	/* Create a new equivalence class in this bucket. */
416	i = eqs_index++;
417	if (i == eqs_alloc)
418	{
419	if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
420	xalloc_die ();
421	eqs_alloc *= 2;
422	eqs = xrealloc (eqs, eqs_alloc * sizeof *eqs);
423	}
424	eqs[i].next = *bucket;
425	eqs[i].hash = h;
426	eqs[i].line = ip;
427	eqs[i].length = length;
428	*bucket = i;
429	break;
430	}
431	else if (eqs[i].hash == h)
432	{
433	char const *eqline = eqs[i].line;
434
435	/* Reuse existing class if lines_differ reports the lines
436	equal. */
437	if (eqs[i].length == length)
438	{
439	/* Reuse existing equivalence class if the lines are identical.
440	This detects the common case of exact identity
441	faster than lines_differ would. */
442	if (memcmp (eqline, ip, length) == 0)
443	break;
444	if (!same_length_diff_contents_compare_anyway)
445	continue;
446	}
447	else if (!diff_length_compare_anyway)
448	continue;
449
450	if (! lines_differ (eqline, ip))
451	break;
452	}
453
454	/* Maybe increase the size of the line table. */
455	if (line == alloc_lines)
456	{
457	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
458	if (PTRDIFF_MAX / 3 <= alloc_lines
459	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
460	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
461	xalloc_die ();
462	alloc_lines = 2 * alloc_lines - linbuf_base;
463	cureqs = xrealloc (cureqs, alloc_lines * sizeof *cureqs);
464	linbuf += linbuf_base;
465	linbuf = xrealloc (linbuf,
466	(alloc_lines - linbuf_base) * sizeof *linbuf);
467	linbuf -= linbuf_base;
468	}
469	linbuf[line] = ip;
470	cureqs[line] = i;
471	++line;
472	}
473
474	current->buffered_lines = line;
475
476	for (i = 0; ; i++)
477	{
478	/* Record the line start for lines in the suffix that we care about.
479	Record one more line start than lines,
480	so that we can compute the length of any buffered line. */
481	if (line == alloc_lines)
482	{
483	/* Double (alloc_lines - linbuf_base) by adding to alloc_lines. */
484	if (PTRDIFF_MAX / 3 <= alloc_lines
485	\|\| PTRDIFF_MAX / sizeof cureqs <= 2 alloc_lines - linbuf_base
486	\|\| PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
487	xalloc_die ();
488	alloc_lines = 2 * alloc_lines - linbuf_base;
489	linbuf += linbuf_base;
490	linbuf = xrealloc (linbuf,
491	(alloc_lines - linbuf_base) * sizeof *linbuf);
492	linbuf -= linbuf_base;
493	}
494	linbuf[line] = (char const *) p;
495
496	if ((char const *) p == bufend)
497	break;
498
499	if (context <= i && no_diff_means_no_output)
500	break;
501
502	line++;
503
504	while (*p++ != '\n')
505	continue;
506	}
507
508	/* Done with cache in local variables. */
509	current->linbuf = linbuf;
510	current->valid_lines = line;
511	current->alloc_lines = alloc_lines;
512	current->equivs = cureqs;
513	equivs = eqs;
514	equivs_alloc = eqs_alloc;
515	equivs_index = eqs_index;
516	}
517
518
519	/* Prepare the text. Make sure the text end is initialized.
520	Make sure text ends in a newline,
521	but remember that we had to add one.
522	Strip trailing CRs, if that was requested. */
523
524	static void
525	prepare_text (struct file_data *current)
526	{
527	size_t buffered = current->buffered;
528	char *p = FILE_BUFFER (current);
529	char *dst;
530
531	if (buffered == 0 \|\| p[buffered - 1] == '\n')
532	current->missing_newline = 0;
533	else
534	{
535	p[buffered++] = '\n';
536	current->missing_newline = 1;
537	}
538
539	if (!p)
540	return;
541
542	/* Don't use uninitialized storage when planting or using sentinels. */
543	memset (p + buffered, 0, sizeof (word));
544
545	if (strip_trailing_cr && (dst = memchr (p, '\r', buffered)))
546	{
547	char const *src = dst;
548	char const *srclim = p + buffered;
549
550	do
551	dst += ! ((dst = src++) == '\r' && *src == '\n');
552	while (src < srclim);
553
554	buffered -= src - dst;
555	}
556
557	current->buffered = buffered;
558	}
559
560
561	/* We have found N lines in a buffer of size S; guess the
562	proportionate number of lines that will be found in a buffer of
563	size T. However, do not guess a number of lines so large that the
564	resulting line table might cause overflow in size calculations. */
565	static lin
566	guess_lines (lin n, size_t s, size_t t)
567	{
568	size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
569	lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
570	return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof (char *) + 1) - 5) + 5;
571	}
572
573	/* Given a vector of two file_data objects, find the identical
574	prefixes and suffixes of each object. */
575
576	static void
577	find_identical_ends (struct file_data filevec[])
578	{
579	word w0, w1;
580	char p0, p1, buffer0, buffer1;
581	char const end0, beg0;
582	char const linbuf0, linbuf1;
583	lin i, lines;
584	size_t n0, n1;
585	lin alloc_lines0, alloc_lines1;
586	lin buffered_prefix, prefix_count, prefix_mask;
587	lin middle_guess, suffix_guess;
588
589	slurp (&filevec[0]);
590	prepare_text (&filevec[0]);
591	if (filevec[0].desc != filevec[1].desc)
592	{
593	slurp (&filevec[1]);
594	prepare_text (&filevec[1]);
595	}
596	else
597	{
598	filevec[1].buffer = filevec[0].buffer;
599	filevec[1].bufsize = filevec[0].bufsize;
600	filevec[1].buffered = filevec[0].buffered;
601	filevec[1].missing_newline = filevec[0].missing_newline;
602	}
603
604	/* Find identical prefix. */
605
606	w0 = filevec[0].buffer;
607	w1 = filevec[1].buffer;
608	p0 = buffer0 = (char *) w0;
609	p1 = buffer1 = (char *) w1;
610	n0 = filevec[0].buffered;
611	n1 = filevec[1].buffered;
612
613	if (p0 == p1)
614	/* The buffers are the same; sentinels won't work. */
615	p0 = p1 += n1;
616	else
617	{
618	/* Insert end sentinels, in this case characters that are guaranteed
619	to make the equality test false, and thus terminate the loop. */
620
621	if (n0 < n1)
622	p0[n0] = ~p1[n0];
623	else
624	p1[n1] = ~p0[n1];
625
626	/* Loop until first mismatch, or to the sentinel characters. */
627
628	/* Compare a word at a time for speed. */
629	while (w0 == w1)
630	w0++, w1++;
631
632	/* Do the last few bytes of comparison a byte at a time. */
633	p0 = (char *) w0;
634	p1 = (char *) w1;
635	while (p0 == p1)
636	p0++, p1++;
637
638	/* Don't mistakenly count missing newline as part of prefix. */
639	if (ROBUST_OUTPUT_STYLE (output_style)
640	&& ((buffer0 + n0 - filevec[0].missing_newline < p0)
641	!=
642	(buffer1 + n1 - filevec[1].missing_newline < p1)))
643	p0--, p1--;
644	}
645
646	/* Now P0 and P1 point at the first nonmatching characters. */
647
648	/* Skip back to last line-beginning in the prefix,
649	and then discard up to HORIZON_LINES lines from the prefix. */
650	i = horizon_lines;
651	while (p0 != buffer0 && (p0[-1] != '\n' \|\| i--))
652	p0--, p1--;
653
654	/* Record the prefix. */
655	filevec[0].prefix_end = p0;
656	filevec[1].prefix_end = p1;
657
658	/* Find identical suffix. */
659
660	/* P0 and P1 point beyond the last chars not yet compared. */
661	p0 = buffer0 + n0;
662	p1 = buffer1 + n1;
663
664	if (! ROBUST_OUTPUT_STYLE (output_style)
665	\|\| filevec[0].missing_newline == filevec[1].missing_newline)
666	{
667	end0 = p0; /* Addr of last char in file 0. */
668
669	/* Get value of P0 at which we should stop scanning backward:
670	this is when either P0 or P1 points just past the last char
671	of the identical prefix. */
672	beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
673
674	/* Scan back until chars don't match or we reach that point. */
675	for (; p0 != beg0; p0--, p1--)
676	if (p0 != p1)
677	{
678	/* Point at the first char of the matching suffix. */
679	beg0 = p0;
680	break;
681	}
682
683	/* Are we at a line-beginning in both files? If not, add the rest of
684	this line to the main body. Discard up to HORIZON_LINES lines from
685	the identical suffix. Also, discard one extra line,
686	because shift_boundaries may need it. */
687	i = horizon_lines + !((buffer0 == p0 \|\| p0[-1] == '\n')
688	&&
689	(buffer1 == p1 \|\| p1[-1] == '\n'));
690	while (i-- && p0 != end0)
691	while (*p0++ != '\n')
692	continue;
693
694	p1 += p0 - beg0;
695	}
696
697	/* Record the suffix. */
698	filevec[0].suffix_begin = p0;
699	filevec[1].suffix_begin = p1;
700
701	/* Calculate number of lines of prefix to save.
702
703	prefix_count == 0 means save the whole prefix;
704	we need this for options like -D that output the whole file,
705	or for enormous contexts (to avoid worrying about arithmetic overflow).
706	We also need it for options like -F that output some preceding line;
707	at least we will need to find the last few lines,
708	but since we don't know how many, it's easiest to find them all.
709
710	Otherwise, prefix_count != 0. Save just prefix_count lines at start
711	of the line buffer; they'll be moved to the proper location later.
712	Handle 1 more line than the context says (because we count 1 too many),
713	rounded up to the next power of 2 to speed index computation. */
714
715	if (no_diff_means_no_output && ! function_regexp.fastmap
716	&& context < LIN_MAX / 4 && context < n0)
717	{
718	middle_guess = guess_lines (0, 0, p0 - filevec[0].prefix_end);
719	suffix_guess = guess_lines (0, 0, buffer0 + n0 - p0);
720	for (prefix_count = 1; prefix_count <= context; prefix_count *= 2)
721	continue;
722	alloc_lines0 = (prefix_count + middle_guess
723	+ MIN (context, suffix_guess));
724	}
725	else
726	{
727	prefix_count = 0;
728	alloc_lines0 = guess_lines (0, 0, n0);
729	}
730
731	prefix_mask = prefix_count - 1;
732	lines = 0;
733	linbuf0 = xmalloc (alloc_lines0 * sizeof *linbuf0);
734	p0 = buffer0;
735
736	/* If the prefix is needed, find the prefix lines. */
737	if (! (no_diff_means_no_output
738	&& filevec[0].prefix_end == p0
739	&& filevec[1].prefix_end == p1))
740	{
741	end0 = filevec[0].prefix_end;
742	while (p0 != end0)
743	{
744	lin l = lines++ & prefix_mask;
745	if (l == alloc_lines0)
746	{
747	if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
748	xalloc_die ();
749	alloc_lines0 *= 2;
750	linbuf0 = xrealloc (linbuf0, alloc_lines0 * sizeof *linbuf0);
751	}
752	linbuf0[l] = p0;
753	while (*p0++ != '\n')
754	continue;
755	}
756	}
757	buffered_prefix = prefix_count && context < lines ? context : lines;
758
759	/* Allocate line buffer 1. */
760
761	middle_guess = guess_lines (lines, p0 - buffer0, p1 - filevec[1].prefix_end);
762	suffix_guess = guess_lines (lines, p0 - buffer0, buffer1 + n1 - p1);
763	alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
764	if (alloc_lines1 < buffered_prefix
765	\|\| PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
766	xalloc_die ();
767	linbuf1 = xmalloc (alloc_lines1 * sizeof *linbuf1);
768
769	if (buffered_prefix != lines)
770	{
771	/* Rotate prefix lines to proper location. */
772	for (i = 0; i < buffered_prefix; i++)
773	linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
774	for (i = 0; i < buffered_prefix; i++)
775	linbuf0[i] = linbuf1[i];
776	}
777
778	/* Initialize line buffer 1 from line buffer 0. */
779	for (i = 0; i < buffered_prefix; i++)
780	linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
781
782	/* Record the line buffer, adjusted so that
783	linbuf[0] points at the first differing line. */
784	filevec[0].linbuf = linbuf0 + buffered_prefix;
785	filevec[1].linbuf = linbuf1 + buffered_prefix;
786	filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
787	filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
788	filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
789	filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
790	}
791
792
793	/* If 1 < k, then (2**k - prime_offset[k]) is the largest prime less
794	than 2**k. This table is derived from Chris K. Caldwell's list
795	<http://www.utm.edu/research/primes/lists/2small/>. */
796
797	static unsigned char const prime_offset[] =
798	{
799	0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
800	15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
801	11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
802	55, 93, 1, 57, 25
803	};
804
805	/* Verify that this host's size_t is not too wide for the above table. */
806
807	verify (enough_prime_offsets,
808	sizeof (size_t) * CHAR_BIT <= sizeof prime_offset);
809
810	/* Given a vector of two file_data objects, read the file associated
811	with each one, and build the table of equivalence classes.
812	Return nonzero if either file appears to be a binary file.
813	If PRETEND_BINARY is nonzero, pretend they are binary regardless. */
814
815	bool
816	read_files (struct file_data filevec[], bool pretend_binary)
817	{
818	int i;
819	bool skip_test = text \| pretend_binary;
820	bool appears_binary = pretend_binary \| sip (&filevec[0], skip_test);
821
822	if (filevec[0].desc != filevec[1].desc)
823	appears_binary \|= sip (&filevec[1], skip_test \| appears_binary);
824	else
825	{
826	filevec[1].buffer = filevec[0].buffer;
827	filevec[1].bufsize = filevec[0].bufsize;
828	filevec[1].buffered = filevec[0].buffered;
829	}
830	if (appears_binary)
831	{
832	set_binary_mode (filevec[0].desc, 1);
833	set_binary_mode (filevec[1].desc, 1);
834	return 1;
835	}
836
837	find_identical_ends (filevec);
838
839	equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
840	if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
841	xalloc_die ();
842	equivs = xmalloc (equivs_alloc * sizeof *equivs);
843	/* Equivalence class 0 is permanently safe for lines that were not
844	hashed. Real equivalence classes start at 1. */
845	equivs_index = 1;
846
847	/* Allocate (one plus) a prime number of hash buckets. Use a prime
848	number between 1/3 and 2/3 of the value of equiv_allocs,
849	approximately. */
850	for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
851	continue;
852	nbuckets = ((size_t) 1 << i) - prime_offset[i];
853	if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
854	xalloc_die ();
855	buckets = zalloc ((nbuckets + 1) * sizeof *buckets);
856	buckets++;
857
858	for (i = 0; i < 2; i++)
859	find_and_hash_each_line (&filevec[i]);
860
861	filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
862
863	free (equivs);
864	free (buckets - 1);
865
866	return 0;
867	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/sys-apps/diffutils/src/io.c

Download in other formats: