source: vendor/diffutils/2.8.1/src/analyze.c

Last change on this file was 2556, checked in by bird, 19 years ago

diffutils 2.8.1

File size: 29.6 KB
Line 
1/* Analyze file differences for GNU DIFF.
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23/* The basic algorithm is described in:
24 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
25 Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
26 see especially section 4.2, which describes the variation used below.
27 Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
28 heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
29 at the price of producing suboptimal output for large inputs with
30 many differences.
31
32 The basic algorithm was independently discovered as described in:
33 "Algorithms for Approximate String Matching", E. Ukkonen,
34 Information and Control Vol. 64, 1985, pp. 100-118. */
35
36#include "diff.h"
37#include <cmpbuf.h>
38#include <error.h>
39#include <regex.h>
40#include <xalloc.h>
41
42static lin *xvec, *yvec; /* Vectors being compared. */
43static lin *fdiag; /* Vector, indexed by diagonal, containing
44 1 + the X coordinate of the point furthest
45 along the given diagonal in the forward
46 search of the edit matrix. */
47static lin *bdiag; /* Vector, indexed by diagonal, containing
48 the X coordinate of the point furthest
49 along the given diagonal in the backward
50 search of the edit matrix. */
51static lin too_expensive; /* Edit scripts longer than this are too
52 expensive to compute. */
53
54#define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
55
56struct partition
57{
58 lin xmid, ymid; /* Midpoints of this partition. */
59 bool lo_minimal; /* Nonzero if low half will be analyzed minimally. */
60 bool hi_minimal; /* Likewise for high half. */
61};
62
63/* Find the midpoint of the shortest edit script for a specified
64 portion of the two files.
65
66 Scan from the beginnings of the files, and simultaneously from the ends,
67 doing a breadth-first search through the space of edit-sequence.
68 When the two searches meet, we have found the midpoint of the shortest
69 edit sequence.
70
71 If FIND_MINIMAL is nonzero, find the minimal edit script regardless
72 of expense. Otherwise, if the search is too expensive, use
73 heuristics to stop the search and report a suboptimal answer.
74
75 Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
76 XMID - YMID equals the number of inserted lines minus the number
77 of deleted lines (counting only lines before the midpoint).
78 Return the approximate edit cost; this is the total number of
79 lines inserted or deleted (counting only lines before the midpoint),
80 unless a heuristic is used to terminate the search prematurely.
81
82 Set PART->lo_minimal to true iff the minimal edit script for the
83 left half of the partition is known; similarly for PART->hi_minimal.
84
85 This function assumes that the first lines of the specified portions
86 of the two files do not match, and likewise that the last lines do not
87 match. The caller must trim matching lines from the beginning and end
88 of the portions it is going to specify.
89
90 If we return the "wrong" partitions,
91 the worst this can do is cause suboptimal diff output.
92 It cannot cause incorrect diff output. */
93
94static lin
95diag (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal,
96 struct partition *part)
97{
98 lin *const fd = fdiag; /* Give the compiler a chance. */
99 lin *const bd = bdiag; /* Additional help for the compiler. */
100 lin const *const xv = xvec; /* Still more help for the compiler. */
101 lin const *const yv = yvec; /* And more and more . . . */
102 lin const dmin = xoff - ylim; /* Minimum valid diagonal. */
103 lin const dmax = xlim - yoff; /* Maximum valid diagonal. */
104 lin const fmid = xoff - yoff; /* Center diagonal of top-down search. */
105 lin const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
106 lin fmin = fmid, fmax = fmid; /* Limits of top-down search. */
107 lin bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
108 lin c; /* Cost. */
109 bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
110 diagonal with respect to the northwest. */
111
112 fd[fmid] = xoff;
113 bd[bmid] = xlim;
114
115 for (c = 1;; ++c)
116 {
117 lin d; /* Active diagonal. */
118 bool big_snake = 0;
119
120 /* Extend the top-down search by an edit step in each diagonal. */
121 fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
122 fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
123 for (d = fmax; d >= fmin; d -= 2)
124 {
125 lin x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
126
127 if (tlo >= thi)
128 x = tlo + 1;
129 else
130 x = thi;
131 oldx = x;
132 y = x - d;
133 while (x < xlim && y < ylim && xv[x] == yv[y])
134 ++x, ++y;
135 if (x - oldx > SNAKE_LIMIT)
136 big_snake = 1;
137 fd[d] = x;
138 if (odd && bmin <= d && d <= bmax && bd[d] <= x)
139 {
140 part->xmid = x;
141 part->ymid = y;
142 part->lo_minimal = part->hi_minimal = 1;
143 return 2 * c - 1;
144 }
145 }
146
147 /* Similarly extend the bottom-up search. */
148 bmin > dmin ? bd[--bmin - 1] = LIN_MAX : ++bmin;
149 bmax < dmax ? bd[++bmax + 1] = LIN_MAX : --bmax;
150 for (d = bmax; d >= bmin; d -= 2)
151 {
152 lin x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
153
154 if (tlo < thi)
155 x = tlo;
156 else
157 x = thi - 1;
158 oldx = x;
159 y = x - d;
160 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
161 --x, --y;
162 if (oldx - x > SNAKE_LIMIT)
163 big_snake = 1;
164 bd[d] = x;
165 if (!odd && fmin <= d && d <= fmax && x <= fd[d])
166 {
167 part->xmid = x;
168 part->ymid = y;
169 part->lo_minimal = part->hi_minimal = 1;
170 return 2 * c;
171 }
172 }
173
174 if (find_minimal)
175 continue;
176
177 /* Heuristic: check occasionally for a diagonal that has made
178 lots of progress compared with the edit distance.
179 If we have any such, find the one that has made the most
180 progress and return it as if it had succeeded.
181
182 With this heuristic, for files with a constant small density
183 of changes, the algorithm is linear in the file size. */
184
185 if (200 < c && big_snake && speed_large_files)
186 {
187 lin best;
188
189 best = 0;
190 for (d = fmax; d >= fmin; d -= 2)
191 {
192 lin dd = d - fmid;
193 lin x = fd[d];
194 lin y = x - d;
195 lin v = (x - xoff) * 2 - dd;
196 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
197 {
198 if (v > best
199 && xoff + SNAKE_LIMIT <= x && x < xlim
200 && yoff + SNAKE_LIMIT <= y && y < ylim)
201 {
202 /* We have a good enough best diagonal;
203 now insist that it end with a significant snake. */
204 int k;
205
206 for (k = 1; xv[x - k] == yv[y - k]; k++)
207 if (k == SNAKE_LIMIT)
208 {
209 best = v;
210 part->xmid = x;
211 part->ymid = y;
212 break;
213 }
214 }
215 }
216 }
217 if (best > 0)
218 {
219 part->lo_minimal = 1;
220 part->hi_minimal = 0;
221 return 2 * c - 1;
222 }
223
224 best = 0;
225 for (d = bmax; d >= bmin; d -= 2)
226 {
227 lin dd = d - bmid;
228 lin x = bd[d];
229 lin y = x - d;
230 lin v = (xlim - x) * 2 + dd;
231 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
232 {
233 if (v > best
234 && xoff < x && x <= xlim - SNAKE_LIMIT
235 && yoff < y && y <= ylim - SNAKE_LIMIT)
236 {
237 /* We have a good enough best diagonal;
238 now insist that it end with a significant snake. */
239 int k;
240
241 for (k = 0; xv[x + k] == yv[y + k]; k++)
242 if (k == SNAKE_LIMIT - 1)
243 {
244 best = v;
245 part->xmid = x;
246 part->ymid = y;
247 break;
248 }
249 }
250 }
251 }
252 if (best > 0)
253 {
254 part->lo_minimal = 0;
255 part->hi_minimal = 1;
256 return 2 * c - 1;
257 }
258 }
259
260 /* Heuristic: if we've gone well beyond the call of duty,
261 give up and report halfway between our best results so far. */
262 if (c >= too_expensive)
263 {
264 lin fxybest, fxbest;
265 lin bxybest, bxbest;
266
267 fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
268
269 /* Find forward diagonal that maximizes X + Y. */
270 fxybest = -1;
271 for (d = fmax; d >= fmin; d -= 2)
272 {
273 lin x = MIN (fd[d], xlim);
274 lin y = x - d;
275 if (ylim < y)
276 x = ylim + d, y = ylim;
277 if (fxybest < x + y)
278 {
279 fxybest = x + y;
280 fxbest = x;
281 }
282 }
283
284 /* Find backward diagonal that minimizes X + Y. */
285 bxybest = LIN_MAX;
286 for (d = bmax; d >= bmin; d -= 2)
287 {
288 lin x = MAX (xoff, bd[d]);
289 lin y = x - d;
290 if (y < yoff)
291 x = yoff + d, y = yoff;
292 if (x + y < bxybest)
293 {
294 bxybest = x + y;
295 bxbest = x;
296 }
297 }
298
299 /* Use the better of the two diagonals. */
300 if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
301 {
302 part->xmid = fxbest;
303 part->ymid = fxybest - fxbest;
304 part->lo_minimal = 1;
305 part->hi_minimal = 0;
306 }
307 else
308 {
309 part->xmid = bxbest;
310 part->ymid = bxybest - bxbest;
311 part->lo_minimal = 0;
312 part->hi_minimal = 1;
313 }
314 return 2 * c - 1;
315 }
316 }
317}
318
319
320/* Compare in detail contiguous subsequences of the two files
321 which are known, as a whole, to match each other.
322
323 The results are recorded in the vectors files[N].changed, by
324 storing 1 in the element for each line that is an insertion or deletion.
325
326 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
327
328 Note that XLIM, YLIM are exclusive bounds.
329 All line numbers are origin-0 and discarded lines are not counted.
330
331 If FIND_MINIMAL, find a minimal difference no matter how
332 expensive it is. */
333
334static void
335compareseq (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal)
336{
337 lin * const xv = xvec; /* Help the compiler. */
338 lin * const yv = yvec;
339
340 /* Slide down the bottom initial diagonal. */
341 while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
342 ++xoff, ++yoff;
343 /* Slide up the top initial diagonal. */
344 while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
345 --xlim, --ylim;
346
347 /* Handle simple cases. */
348 if (xoff == xlim)
349 while (yoff < ylim)
350 files[1].changed[files[1].realindexes[yoff++]] = 1;
351 else if (yoff == ylim)
352 while (xoff < xlim)
353 files[0].changed[files[0].realindexes[xoff++]] = 1;
354 else
355 {
356 lin c;
357 struct partition part;
358
359 /* Find a point of correspondence in the middle of the files. */
360
361 c = diag (xoff, xlim, yoff, ylim, find_minimal, &part);
362
363 if (c == 1)
364 {
365 /* This should be impossible, because it implies that
366 one of the two subsequences is empty,
367 and that case was handled above without calling `diag'.
368 Let's verify that this is true. */
369 abort ();
370#if 0
371 /* The two subsequences differ by a single insert or delete;
372 record it and we are done. */
373 if (part.xmid - part.ymid < xoff - yoff)
374 files[1].changed[files[1].realindexes[part.ymid - 1]] = 1;
375 else
376 files[0].changed[files[0].realindexes[part.xmid]] = 1;
377#endif
378 }
379 else
380 {
381 /* Use the partitions to split this problem into subproblems. */
382 compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
383 compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
384 }
385 }
386}
387
388
389/* Discard lines from one file that have no matches in the other file.
390
391 A line which is discarded will not be considered by the actual
392 comparison algorithm; it will be as if that line were not in the file.
393 The file's `realindexes' table maps virtual line numbers
394 (which don't count the discarded lines) into real line numbers;
395 this is how the actual comparison algorithm produces results
396 that are comprehensible when the discarded lines are counted.
397
398 When we discard a line, we also mark it as a deletion or insertion
399 so that it will be printed in the output. */
400
401static void
402discard_confusing_lines (struct file_data filevec[])
403{
404 int f;
405 lin i;
406 char *discarded[2];
407 lin *equiv_count[2];
408 lin *p;
409
410 /* Allocate our results. */
411 p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
412 * (2 * sizeof *p));
413 for (f = 0; f < 2; f++)
414 {
415 filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
416 filevec[f].realindexes = p; p += filevec[f].buffered_lines;
417 }
418
419 /* Set up equiv_count[F][I] as the number of lines in file F
420 that fall in equivalence class I. */
421
422 p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
423 equiv_count[0] = p;
424 equiv_count[1] = p + filevec[0].equiv_max;
425
426 for (i = 0; i < filevec[0].buffered_lines; ++i)
427 ++equiv_count[0][filevec[0].equivs[i]];
428 for (i = 0; i < filevec[1].buffered_lines; ++i)
429 ++equiv_count[1][filevec[1].equivs[i]];
430
431 /* Set up tables of which lines are going to be discarded. */
432
433 discarded[0] = zalloc (filevec[0].buffered_lines
434 + filevec[1].buffered_lines);
435 discarded[1] = discarded[0] + filevec[0].buffered_lines;
436
437 /* Mark to be discarded each line that matches no line of the other file.
438 If a line matches many lines, mark it as provisionally discardable. */
439
440 for (f = 0; f < 2; f++)
441 {
442 size_t end = filevec[f].buffered_lines;
443 char *discards = discarded[f];
444 lin *counts = equiv_count[1 - f];
445 lin *equivs = filevec[f].equivs;
446 size_t many = 5;
447 size_t tem = end / 64;
448
449 /* Multiply MANY by approximate square root of number of lines.
450 That is the threshold for provisionally discardable lines. */
451 while ((tem = tem >> 2) > 0)
452 many *= 2;
453
454 for (i = 0; i < end; i++)
455 {
456 lin nmatch;
457 if (equivs[i] == 0)
458 continue;
459 nmatch = counts[equivs[i]];
460 if (nmatch == 0)
461 discards[i] = 1;
462 else if (nmatch > many)
463 discards[i] = 2;
464 }
465 }
466
467 /* Don't really discard the provisional lines except when they occur
468 in a run of discardables, with nonprovisionals at the beginning
469 and end. */
470
471 for (f = 0; f < 2; f++)
472 {
473 lin end = filevec[f].buffered_lines;
474 register char *discards = discarded[f];
475
476 for (i = 0; i < end; i++)
477 {
478 /* Cancel provisional discards not in middle of run of discards. */
479 if (discards[i] == 2)
480 discards[i] = 0;
481 else if (discards[i] != 0)
482 {
483 /* We have found a nonprovisional discard. */
484 register lin j;
485 lin length;
486 lin provisional = 0;
487
488 /* Find end of this run of discardable lines.
489 Count how many are provisionally discardable. */
490 for (j = i; j < end; j++)
491 {
492 if (discards[j] == 0)
493 break;
494 if (discards[j] == 2)
495 ++provisional;
496 }
497
498 /* Cancel provisional discards at end, and shrink the run. */
499 while (j > i && discards[j - 1] == 2)
500 discards[--j] = 0, --provisional;
501
502 /* Now we have the length of a run of discardable lines
503 whose first and last are not provisional. */
504 length = j - i;
505
506 /* If 1/4 of the lines in the run are provisional,
507 cancel discarding of all provisional lines in the run. */
508 if (provisional * 4 > length)
509 {
510 while (j > i)
511 if (discards[--j] == 2)
512 discards[j] = 0;
513 }
514 else
515 {
516 register lin consec;
517 lin minimum = 1;
518 lin tem = length >> 2;
519
520 /* MINIMUM is approximate square root of LENGTH/4.
521 A subrun of two or more provisionals can stand
522 when LENGTH is at least 16.
523 A subrun of 4 or more can stand when LENGTH >= 64. */
524 while (0 < (tem >>= 2))
525 minimum <<= 1;
526 minimum++;
527
528 /* Cancel any subrun of MINIMUM or more provisionals
529 within the larger run. */
530 for (j = 0, consec = 0; j < length; j++)
531 if (discards[i + j] != 2)
532 consec = 0;
533 else if (minimum == ++consec)
534 /* Back up to start of subrun, to cancel it all. */
535 j -= consec;
536 else if (minimum < consec)
537 discards[i + j] = 0;
538
539 /* Scan from beginning of run
540 until we find 3 or more nonprovisionals in a row
541 or until the first nonprovisional at least 8 lines in.
542 Until that point, cancel any provisionals. */
543 for (j = 0, consec = 0; j < length; j++)
544 {
545 if (j >= 8 && discards[i + j] == 1)
546 break;
547 if (discards[i + j] == 2)
548 consec = 0, discards[i + j] = 0;
549 else if (discards[i + j] == 0)
550 consec = 0;
551 else
552 consec++;
553 if (consec == 3)
554 break;
555 }
556
557 /* I advances to the last line of the run. */
558 i += length - 1;
559
560 /* Same thing, from end. */
561 for (j = 0, consec = 0; j < length; j++)
562 {
563 if (j >= 8 && discards[i - j] == 1)
564 break;
565 if (discards[i - j] == 2)
566 consec = 0, discards[i - j] = 0;
567 else if (discards[i - j] == 0)
568 consec = 0;
569 else
570 consec++;
571 if (consec == 3)
572 break;
573 }
574 }
575 }
576 }
577 }
578
579 /* Actually discard the lines. */
580 for (f = 0; f < 2; f++)
581 {
582 char *discards = discarded[f];
583 lin end = filevec[f].buffered_lines;
584 lin j = 0;
585 for (i = 0; i < end; ++i)
586 if (minimal || discards[i] == 0)
587 {
588 filevec[f].undiscarded[j] = filevec[f].equivs[i];
589 filevec[f].realindexes[j++] = i;
590 }
591 else
592 filevec[f].changed[i] = 1;
593 filevec[f].nondiscarded_lines = j;
594 }
595
596 free (discarded[0]);
597 free (equiv_count[0]);
598}
599
600
601/* Adjust inserts/deletes of identical lines to join changes
602 as much as possible.
603
604 We do something when a run of changed lines include a
605 line at one end and have an excluded, identical line at the other.
606 We are free to choose which identical line is included.
607 `compareseq' usually chooses the one at the beginning,
608 but usually it is cleaner to consider the following identical line
609 to be the "change". */
610
611static void
612shift_boundaries (struct file_data filevec[])
613{
614 int f;
615
616 for (f = 0; f < 2; f++)
617 {
618 bool *changed = filevec[f].changed;
619 bool const *other_changed = filevec[1 - f].changed;
620 lin const *equivs = filevec[f].equivs;
621 lin i = 0;
622 lin j = 0;
623 lin i_end = filevec[f].buffered_lines;
624
625 while (1)
626 {
627 lin runlength, start, corresponding;
628
629 /* Scan forwards to find beginning of another run of changes.
630 Also keep track of the corresponding point in the other file. */
631
632 while (i < i_end && !changed[i])
633 {
634 while (other_changed[j++])
635 continue;
636 i++;
637 }
638
639 if (i == i_end)
640 break;
641
642 start = i;
643
644 /* Find the end of this run of changes. */
645
646 while (changed[++i])
647 continue;
648 while (other_changed[j])
649 j++;
650
651 do
652 {
653 /* Record the length of this run of changes, so that
654 we can later determine whether the run has grown. */
655 runlength = i - start;
656
657 /* Move the changed region back, so long as the
658 previous unchanged line matches the last changed one.
659 This merges with previous changed regions. */
660
661 while (start && equivs[start - 1] == equivs[i - 1])
662 {
663 changed[--start] = 1;
664 changed[--i] = 0;
665 while (changed[start - 1])
666 start--;
667 while (other_changed[--j])
668 continue;
669 }
670
671 /* Set CORRESPONDING to the end of the changed run, at the last
672 point where it corresponds to a changed run in the other file.
673 CORRESPONDING == I_END means no such point has been found. */
674 corresponding = other_changed[j - 1] ? i : i_end;
675
676 /* Move the changed region forward, so long as the
677 first changed line matches the following unchanged one.
678 This merges with following changed regions.
679 Do this second, so that if there are no merges,
680 the changed region is moved forward as far as possible. */
681
682 while (i != i_end && equivs[start] == equivs[i])
683 {
684 changed[start++] = 0;
685 changed[i++] = 1;
686 while (changed[i])
687 i++;
688 while (other_changed[++j])
689 corresponding = i;
690 }
691 }
692 while (runlength != i - start);
693
694 /* If possible, move the fully-merged run of changes
695 back to a corresponding run in the other file. */
696
697 while (corresponding < i)
698 {
699 changed[--start] = 1;
700 changed[--i] = 0;
701 while (other_changed[--j])
702 continue;
703 }
704 }
705 }
706}
707
708
709/* Cons an additional entry onto the front of an edit script OLD.
710 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
711 DELETED is the number of lines deleted here from file 0.
712 INSERTED is the number of lines inserted here in file 1.
713
714 If DELETED is 0 then LINE0 is the number of the line before
715 which the insertion was done; vice versa for INSERTED and LINE1. */
716
717static struct change *
718add_change (lin line0, lin line1, lin deleted, lin inserted,
719 struct change *old)
720{
721 struct change *new = xmalloc (sizeof *new);
722
723 new->line0 = line0;
724 new->line1 = line1;
725 new->inserted = inserted;
726 new->deleted = deleted;
727 new->link = old;
728 return new;
729}
730
731/* Scan the tables of which lines are inserted and deleted,
732 producing an edit script in reverse order. */
733
734static struct change *
735build_reverse_script (struct file_data const filevec[])
736{
737 struct change *script = 0;
738 bool *changed0 = filevec[0].changed;
739 bool *changed1 = filevec[1].changed;
740 lin len0 = filevec[0].buffered_lines;
741 lin len1 = filevec[1].buffered_lines;
742
743 /* Note that changedN[len0] does exist, and is 0. */
744
745 lin i0 = 0, i1 = 0;
746
747 while (i0 < len0 || i1 < len1)
748 {
749 if (changed0[i0] | changed1[i1])
750 {
751 lin line0 = i0, line1 = i1;
752
753 /* Find # lines changed here in each file. */
754 while (changed0[i0]) ++i0;
755 while (changed1[i1]) ++i1;
756
757 /* Record this change. */
758 script = add_change (line0, line1, i0 - line0, i1 - line1, script);
759 }
760
761 /* We have reached lines in the two files that match each other. */
762 i0++, i1++;
763 }
764
765 return script;
766}
767
768/* Scan the tables of which lines are inserted and deleted,
769 producing an edit script in forward order. */
770
771static struct change *
772build_script (struct file_data const filevec[])
773{
774 struct change *script = 0;
775 bool *changed0 = filevec[0].changed;
776 bool *changed1 = filevec[1].changed;
777 lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
778
779 /* Note that changedN[-1] does exist, and is 0. */
780
781 while (i0 >= 0 || i1 >= 0)
782 {
783 if (changed0[i0 - 1] | changed1[i1 - 1])
784 {
785 lin line0 = i0, line1 = i1;
786
787 /* Find # lines changed here in each file. */
788 while (changed0[i0 - 1]) --i0;
789 while (changed1[i1 - 1]) --i1;
790
791 /* Record this change. */
792 script = add_change (i0, i1, line0 - i0, line1 - i1, script);
793 }
794
795 /* We have reached lines in the two files that match each other. */
796 i0--, i1--;
797 }
798
799 return script;
800}
801
802
803/* If CHANGES, briefly report that two files differed.
804 Return 2 if trouble, CHANGES otherwise. */
805static int
806briefly_report (int changes, struct file_data const filevec[])
807{
808 if (changes)
809 {
810 char const *label0 = file_label[0] ? file_label[0] : filevec[0].name;
811 char const *label1 = file_label[1] ? file_label[1] : filevec[1].name;
812
813 if (brief)
814 message ("Files %s and %s differ\n", label0, label1);
815 else
816 {
817 message ("Binary files %s and %s differ\n", label0, label1);
818 changes = 2;
819 }
820 }
821
822 return changes;
823}
824
825/* Report the differences of two files. */
826int
827diff_2_files (struct comparison *cmp)
828{
829 lin diags;
830 int f;
831 struct change *e, *p;
832 struct change *script;
833 int changes;
834
835
836 /* If we have detected that either file is binary,
837 compare the two files as binary. This can happen
838 only when the first chunk is read.
839 Also, --brief without any --ignore-* options means
840 we can speed things up by treating the files as binary. */
841
842 if (read_files (cmp->file, files_can_be_treated_as_binary))
843 {
844 /* Files with different lengths must be different. */
845 if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
846 && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
847 && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
848 changes = 1;
849
850 /* Standard input equals itself. */
851 else if (cmp->file[0].desc == cmp->file[1].desc)
852 changes = 0;
853
854 else
855 /* Scan both files, a buffer at a time, looking for a difference. */
856 {
857 /* Allocate same-sized buffers for both files. */
858 size_t lcm_max = PTRDIFF_MAX - 1;
859 size_t buffer_size =
860 buffer_lcm (sizeof (word),
861 buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
862 STAT_BLOCKSIZE (cmp->file[1].stat),
863 lcm_max),
864 lcm_max);
865 for (f = 0; f < 2; f++)
866 cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
867
868 for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
869 {
870 /* Read a buffer's worth from both files. */
871 for (f = 0; f < 2; f++)
872 if (0 <= cmp->file[f].desc)
873 file_block_read (&cmp->file[f],
874 buffer_size - cmp->file[f].buffered);
875
876 /* If the buffers differ, the files differ. */
877 if (cmp->file[0].buffered != cmp->file[1].buffered
878 || memcmp (cmp->file[0].buffer,
879 cmp->file[1].buffer,
880 cmp->file[0].buffered))
881 {
882 changes = 1;
883 break;
884 }
885
886 /* If we reach end of file, the files are the same. */
887 if (cmp->file[0].buffered != buffer_size)
888 {
889 changes = 0;
890 break;
891 }
892 }
893 }
894
895 changes = briefly_report (changes, cmp->file);
896 }
897 else
898 {
899 /* Allocate vectors for the results of comparison:
900 a flag for each line of each file, saying whether that line
901 is an insertion or deletion.
902 Allocate an extra element, always 0, at each end of each vector. */
903
904 size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
905 bool *flag_space = zalloc (s * sizeof *flag_space);
906 cmp->file[0].changed = flag_space + 1;
907 cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
908
909 /* Some lines are obviously insertions or deletions
910 because they don't match anything. Detect them now, and
911 avoid even thinking about them in the main comparison algorithm. */
912
913 discard_confusing_lines (cmp->file);
914
915 /* Now do the main comparison algorithm, considering just the
916 undiscarded lines. */
917
918 xvec = cmp->file[0].undiscarded;
919 yvec = cmp->file[1].undiscarded;
920 diags = (cmp->file[0].nondiscarded_lines
921 + cmp->file[1].nondiscarded_lines + 3);
922 fdiag = xmalloc (diags * (2 * sizeof *fdiag));
923 bdiag = fdiag + diags;
924 fdiag += cmp->file[1].nondiscarded_lines + 1;
925 bdiag += cmp->file[1].nondiscarded_lines + 1;
926
927 /* Set TOO_EXPENSIVE to be approximate square root of input size,
928 bounded below by 256. */
929 too_expensive = 1;
930 for (; diags != 0; diags >>= 2)
931 too_expensive <<= 1;
932 too_expensive = MAX (256, too_expensive);
933
934 files[0] = cmp->file[0];
935 files[1] = cmp->file[1];
936
937 compareseq (0, cmp->file[0].nondiscarded_lines,
938 0, cmp->file[1].nondiscarded_lines, minimal);
939
940 free (fdiag - (cmp->file[1].nondiscarded_lines + 1));
941
942 /* Modify the results slightly to make them prettier
943 in cases where that can validly be done. */
944
945 shift_boundaries (cmp->file);
946
947 /* Get the results of comparison in the form of a chain
948 of `struct change's -- an edit script. */
949
950 if (output_style == OUTPUT_ED)
951 script = build_reverse_script (cmp->file);
952 else
953 script = build_script (cmp->file);
954
955 /* Set CHANGES if we had any diffs.
956 If some changes are ignored, we must scan the script to decide. */
957 if (ignore_blank_lines || ignore_regexp.fastmap)
958 {
959 struct change *next = script;
960 changes = 0;
961
962 while (next && changes == 0)
963 {
964 struct change *this, *end;
965 lin first0, last0, first1, last1;
966
967 /* Find a set of changes that belong together. */
968 this = next;
969 end = find_change (next);
970
971 /* Disconnect them from the rest of the changes, making them
972 a hunk, and remember the rest for next iteration. */
973 next = end->link;
974 end->link = 0;
975
976 /* Determine whether this hunk is really a difference. */
977 if (analyze_hunk (this, &first0, &last0, &first1, &last1))
978 changes = 1;
979
980 /* Reconnect the script so it will all be freed properly. */
981 end->link = next;
982 }
983 }
984 else
985 changes = (script != 0);
986
987 if (brief)
988 changes = briefly_report (changes, cmp->file);
989 else
990 {
991 if (changes | !no_diff_means_no_output)
992 {
993 /* Record info for starting up output,
994 to be used if and when we have some output to print. */
995 setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
996 file_label[1] ? file_label[1] : cmp->file[1].name,
997 cmp->parent != 0);
998
999 switch (output_style)
1000 {
1001 case OUTPUT_CONTEXT:
1002 print_context_script (script, 0);
1003 break;
1004
1005 case OUTPUT_UNIFIED:
1006 print_context_script (script, 1);
1007 break;
1008
1009 case OUTPUT_ED:
1010 print_ed_script (script);
1011 break;
1012
1013 case OUTPUT_FORWARD_ED:
1014 pr_forward_ed_script (script);
1015 break;
1016
1017 case OUTPUT_RCS:
1018 print_rcs_script (script);
1019 break;
1020
1021 case OUTPUT_NORMAL:
1022 print_normal_script (script);
1023 break;
1024
1025 case OUTPUT_IFDEF:
1026 print_ifdef_script (script);
1027 break;
1028
1029 case OUTPUT_SDIFF:
1030 print_sdiff_script (script);
1031 break;
1032
1033 default:
1034 abort ();
1035 }
1036
1037 finish_output ();
1038 }
1039 }
1040
1041 free (cmp->file[0].undiscarded);
1042
1043 free (flag_space);
1044
1045 for (f = 0; f < 2; f++)
1046 {
1047 free (cmp->file[f].equivs);
1048 free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
1049 }
1050
1051 for (e = script; e; e = p)
1052 {
1053 p = e->link;
1054 free (e);
1055 }
1056
1057 if (! ROBUST_OUTPUT_STYLE (output_style))
1058 for (f = 0; f < 2; ++f)
1059 if (cmp->file[f].missing_newline)
1060 {
1061 error (0, 0, "%s: %s\n",
1062 file_label[f] ? file_label[f] : cmp->file[f].name,
1063 _("No newline at end of file"));
1064 changes = 2;
1065 }
1066 }
1067
1068 if (cmp->file[0].buffer != cmp->file[1].buffer)
1069 free (cmp->file[0].buffer);
1070 free (cmp->file[1].buffer);
1071
1072 return changes;
1073}
Note: See TracBrowser for help on using the repository browser.