source: trunk/diffutils/src/util.c@ 2946

Last change on this file since 2946 was 2556, checked in by bird, 20 years ago

diffutils 2.8.1

File size: 18.3 KB
Line 
1/* Support routines for GNU DIFF.
2
3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
4 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 GNU DIFF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU DIFF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
23#include "diff.h"
24#include <dirname.h>
25#include <error.h>
26#include <quotesys.h>
27#include <regex.h>
28#include <xalloc.h>
29
30char const pr_program[] = PR_PROGRAM;
31
32/* Queue up one-line messages to be printed at the end,
33 when -l is specified. Each message is recorded with a `struct msg'. */
34
35struct msg
36{
37 struct msg *next;
38 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */
39};
40
41/* Head of the chain of queues messages. */
42
43static struct msg *msg_chain;
44
45/* Tail of the chain of queues messages. */
46
47static struct msg **msg_chain_end = &msg_chain;
48
49
50/* Use when a system call returns non-zero status.
51 NAME should normally be the file name. */
52
53void
54perror_with_name (char const *name)
55{
56 error (0, errno, "%s", name);
57}
58
59/* Use when a system call returns non-zero status and that is fatal. */
60
61void
62pfatal_with_name (char const *name)
63{
64 int e = errno;
65 print_message_queue ();
66 error (EXIT_TROUBLE, e, "%s", name);
67 abort ();
68}
69
70/* Print an error message containing MSGID, then exit. */
71
72void
73fatal (char const *msgid)
74{
75 print_message_queue ();
76 error (EXIT_TROUBLE, 0, "%s", _(msgid));
77 abort ();
78}
79
80
81/* Like printf, except if -l in effect then save the message and print later.
82 This is used for things like "Only in ...". */
83
84void
85message (char const *format_msgid, char const *arg1, char const *arg2)
86{
87 message5 (format_msgid, arg1, arg2, 0, 0);
88}
89
90void
91message5 (char const *format_msgid, char const *arg1, char const *arg2,
92 char const *arg3, char const *arg4)
93{
94 if (paginate)
95 {
96 char *p;
97 char const *arg[5];
98 int i;
99 size_t size[5];
100 size_t total_size = offsetof (struct msg, args);
101 struct msg *new;
102
103 arg[0] = format_msgid;
104 arg[1] = arg1;
105 arg[2] = arg2;
106 arg[3] = arg3 ? arg3 : "";
107 arg[4] = arg4 ? arg4 : "";
108
109 for (i = 0; i < 5; i++)
110 total_size += size[i] = strlen (arg[i]) + 1;
111
112 new = xmalloc (total_size);
113
114 for (i = 0, p = new->args; i < 5; p += size[i++])
115 memcpy (p, arg[i], size[i]);
116
117 *msg_chain_end = new;
118 new->next = 0;
119 msg_chain_end = &new->next;
120 }
121 else
122 {
123 if (sdiff_merge_assist)
124 putchar (' ');
125 printf (_(format_msgid), arg1, arg2, arg3, arg4);
126 }
127}
128
129/* Output all the messages that were saved up by calls to `message'. */
130
131void
132print_message_queue (void)
133{
134 char const *arg[5];
135 int i;
136 struct msg *m = msg_chain;
137
138 while (m)
139 {
140 struct msg *next = m->next;
141 arg[0] = m->args;
142 for (i = 0; i < 4; i++)
143 arg[i + 1] = arg[i] + strlen (arg[i]) + 1;
144 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]);
145 free (m);
146 m = next;
147 }
148}
149
150
151/* Call before outputting the results of comparing files NAME0 and NAME1
152 to set up OUTFILE, the stdio stream for the output to go to.
153
154 Usually, OUTFILE is just stdout. But when -l was specified
155 we fork off a `pr' and make OUTFILE a pipe to it.
156 `pr' then outputs to our stdout. */
157
158static char const *current_name0;
159static char const *current_name1;
160static bool currently_recursive;
161
162void
163setup_output (char const *name0, char const *name1, bool recursive)
164{
165 current_name0 = name0;
166 current_name1 = name1;
167 currently_recursive = recursive;
168 outfile = 0;
169}
170
171#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
172static pid_t pr_pid;
173#endif
174
175void
176begin_output (void)
177{
178 char *name;
179
180 if (outfile != 0)
181 return;
182
183 /* Construct the header of this piece of diff. */
184 name = xmalloc (strlen (current_name0) + strlen (current_name1)
185 + strlen (switch_string) + 7);
186
187 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in
188 the standard: it says that we must print only the last component
189 of the pathnames, and it requires two spaces after "diff" if
190 there are no options. These requirements are silly and do not
191 match historical practice. */
192 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1);
193
194 if (paginate)
195 {
196 if (fflush (stdout) != 0)
197 pfatal_with_name (_("write failed"));
198
199 /* Make OUTFILE a pipe to a subsidiary `pr'. */
200 {
201#if HAVE_WORKING_FORK || HAVE_WORKING_VFORK
202 int pipes[2];
203
204 if (pipe (pipes) != 0)
205 pfatal_with_name ("pipe");
206
207 pr_pid = vfork ();
208 if (pr_pid < 0)
209 pfatal_with_name ("fork");
210
211 if (pr_pid == 0)
212 {
213 close (pipes[1]);
214 if (pipes[0] != STDIN_FILENO)
215 {
216 if (dup2 (pipes[0], STDIN_FILENO) < 0)
217 pfatal_with_name ("dup2");
218 close (pipes[0]);
219 }
220
221 execl (pr_program, pr_program, "-h", name, 0);
222 _exit (errno == ENOEXEC ? 126 : 127);
223 }
224 else
225 {
226 close (pipes[0]);
227 outfile = fdopen (pipes[1], "w");
228 if (!outfile)
229 pfatal_with_name ("fdopen");
230 }
231#else
232 char *command = xmalloc (sizeof pr_program - 1 + 7
233 + quote_system_arg ((char *) 0, name) + 1);
234 char *p;
235 sprintf (command, "%s -f -h ", pr_program);
236 p = command + sizeof pr_program - 1 + 7;
237 p += quote_system_arg (p, name);
238 *p = 0;
239 errno = 0;
240 outfile = popen (command, "w");
241 if (!outfile)
242 pfatal_with_name (command);
243 free (command);
244#endif
245 }
246 }
247 else
248 {
249
250 /* If -l was not specified, output the diff straight to `stdout'. */
251
252 outfile = stdout;
253
254 /* If handling multiple files (because scanning a directory),
255 print which files the following output is about. */
256 if (currently_recursive)
257 printf ("%s\n", name);
258 }
259
260 free (name);
261
262 /* A special header is needed at the beginning of context output. */
263 switch (output_style)
264 {
265 case OUTPUT_CONTEXT:
266 print_context_header (files, 0);
267 break;
268
269 case OUTPUT_UNIFIED:
270 print_context_header (files, 1);
271 break;
272
273 default:
274 break;
275 }
276}
277
278/* Call after the end of output of diffs for one file.
279 Close OUTFILE and get rid of the `pr' subfork. */
280
281void
282finish_output (void)
283{
284 if (outfile != 0 && outfile != stdout)
285 {
286 int wstatus;
287 int werrno = 0;
288 if (ferror (outfile))
289 fatal ("write failed");
290#if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK)
291 wstatus = pclose (outfile);
292 if (wstatus == -1)
293 werrno = errno;
294#else
295 if (fclose (outfile) != 0)
296 pfatal_with_name (_("write failed"));
297 if (waitpid (pr_pid, &wstatus, 0) < 0)
298 pfatal_with_name ("waitpid");
299#endif
300 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127)
301 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"),
302 pr_program);
303 if (wstatus != 0)
304 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"),
305 pr_program);
306 }
307
308 outfile = 0;
309}
310
311
312/* Compare two lines (typically one from each input file)
313 according to the command line options.
314 For efficiency, this is invoked only when the lines do not match exactly
315 but an option like -i might cause us to ignore the difference.
316 Return nonzero if the lines differ. */
317
318bool
319lines_differ (char const *s1, char const *s2)
320{
321 register unsigned char const *t1 = (unsigned char const *) s1;
322 register unsigned char const *t2 = (unsigned char const *) s2;
323 size_t column = 0;
324
325 while (1)
326 {
327 register unsigned char c1 = *t1++;
328 register unsigned char c2 = *t2++;
329
330 /* Test for exact char equality first, since it's a common case. */
331 if (c1 != c2)
332 {
333 switch (ignore_white_space)
334 {
335 case IGNORE_ALL_SPACE:
336 /* For -w, just skip past any white space. */
337 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++;
338 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++;
339 break;
340
341 case IGNORE_SPACE_CHANGE:
342 /* For -b, advance past any sequence of white space in
343 line 1 and consider it just one space, or nothing at
344 all if it is at the end of the line. */
345 if (ISSPACE (c1))
346 {
347 while (c1 != '\n')
348 {
349 c1 = *t1++;
350 if (! ISSPACE (c1))
351 {
352 --t1;
353 c1 = ' ';
354 break;
355 }
356 }
357 }
358
359 /* Likewise for line 2. */
360 if (ISSPACE (c2))
361 {
362 while (c2 != '\n')
363 {
364 c2 = *t2++;
365 if (! ISSPACE (c2))
366 {
367 --t2;
368 c2 = ' ';
369 break;
370 }
371 }
372 }
373
374 if (c1 != c2)
375 {
376 /* If we went too far when doing the simple test
377 for equality, go back to the first non-white-space
378 character in both sides and try again. */
379 if (c2 == ' ' && c1 != '\n'
380 && (unsigned char const *) s1 + 1 < t1
381 && ISSPACE (t1[-2]))
382 {
383 --t1;
384 continue;
385 }
386 if (c1 == ' ' && c2 != '\n'
387 && (unsigned char const *) s2 + 1 < t2
388 && ISSPACE (t2[-2]))
389 {
390 --t2;
391 continue;
392 }
393 }
394
395 break;
396
397 case IGNORE_TAB_EXPANSION:
398 if ((c1 == ' ' && c2 == '\t')
399 || (c1 == '\t' && c2 == ' '))
400 {
401 size_t column2 = column;
402 for (;; c1 = *t1++)
403 {
404 if (c1 == ' ')
405 column++;
406 else if (c1 == '\t')
407 column += TAB_WIDTH - column % TAB_WIDTH;
408 else
409 break;
410 }
411 for (;; c2 = *t2++)
412 {
413 if (c2 == ' ')
414 column2++;
415 else if (c2 == '\t')
416 column2 += TAB_WIDTH - column2 % TAB_WIDTH;
417 else
418 break;
419 }
420 if (column != column2)
421 return 1;
422 }
423 break;
424
425 case IGNORE_NO_WHITE_SPACE:
426 break;
427 }
428
429 /* Lowercase all letters if -i is specified. */
430
431 if (ignore_case)
432 {
433 c1 = TOLOWER (c1);
434 c2 = TOLOWER (c2);
435 }
436
437 if (c1 != c2)
438 break;
439 }
440 if (c1 == '\n')
441 return 0;
442
443 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1;
444 }
445
446 return 1;
447}
448
449
450/* Find the consecutive changes at the start of the script START.
451 Return the last link before the first gap. */
452
453struct change *
454find_change (struct change *start)
455{
456 return start;
457}
458
459struct change *
460find_reverse_change (struct change *start)
461{
462 return start;
463}
464
465
466/* Divide SCRIPT into pieces by calling HUNKFUN and
467 print each piece with PRINTFUN.
468 Both functions take one arg, an edit script.
469
470 HUNKFUN is called with the tail of the script
471 and returns the last link that belongs together with the start
472 of the tail.
473
474 PRINTFUN takes a subscript which belongs together (with a null
475 link at the end) and prints it. */
476
477void
478print_script (struct change *script,
479 struct change * (*hunkfun) (struct change *),
480 void (*printfun) (struct change *))
481{
482 struct change *next = script;
483
484 while (next)
485 {
486 struct change *this, *end;
487
488 /* Find a set of changes that belong together. */
489 this = next;
490 end = (*hunkfun) (next);
491
492 /* Disconnect them from the rest of the changes,
493 making them a hunk, and remember the rest for next iteration. */
494 next = end->link;
495 end->link = 0;
496#ifdef DEBUG
497 debug_script (this);
498#endif
499
500 /* Print this hunk. */
501 (*printfun) (this);
502
503 /* Reconnect the script so it will all be freed properly. */
504 end->link = next;
505 }
506}
507
508
509/* Print the text of a single line LINE,
510 flagging it with the characters in LINE_FLAG (which say whether
511 the line is inserted, deleted, changed, etc.). */
512
513void
514print_1_line (char const *line_flag, char const *const *line)
515{
516 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
517 FILE *out = outfile; /* Help the compiler some more. */
518 char const *flag_format = 0;
519
520 /* If -T was specified, use a Tab between the line-flag and the text.
521 Otherwise use a Space (as Unix diff does).
522 Print neither space nor tab if line-flags are empty. */
523
524 if (line_flag && *line_flag)
525 {
526 flag_format = initial_tab ? "%s\t" : "%s ";
527 fprintf (out, flag_format, line_flag);
528 }
529
530 output_1_line (base, limit, flag_format, line_flag);
531
532 if ((!line_flag || line_flag[0]) && limit[-1] != '\n')
533 fprintf (out, "\n\\ %s\n", _("No newline at end of file"));
534}
535
536/* Output a line from BASE up to LIMIT.
537 With -t, expand white space characters to spaces, and if FLAG_FORMAT
538 is nonzero, output it with argument LINE_FLAG after every
539 internal carriage return, so that tab stops continue to line up. */
540
541void
542output_1_line (char const *base, char const *limit, char const *flag_format,
543 char const *line_flag)
544{
545 if (!expand_tabs)
546 fwrite (base, limit - base, 1, outfile);
547 else
548 {
549 register FILE *out = outfile;
550 register unsigned char c;
551 register char const *t = base;
552 register unsigned int column = 0;
553
554 while (t < limit)
555 switch ((c = *t++))
556 {
557 case '\t':
558 {
559 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH;
560 column += spaces;
561 do
562 putc (' ', out);
563 while (--spaces);
564 }
565 break;
566
567 case '\r':
568 putc (c, out);
569 if (flag_format && t < limit && *t != '\n')
570 fprintf (out, flag_format, line_flag);
571 column = 0;
572 break;
573
574 case '\b':
575 if (column == 0)
576 continue;
577 column--;
578 putc (c, out);
579 break;
580
581 default:
582 if (ISPRINT (c))
583 column++;
584 putc (c, out);
585 break;
586 }
587 }
588}
589
590char const change_letter[] = { 0, 'd', 'a', 'c' };
591
592
593/* Translate an internal line number (an index into diff's table of lines)
594 into an actual line number in the input file.
595 The internal line number is I. FILE points to the data on the file.
596
597 Internal line numbers count from 0 starting after the prefix.
598 Actual line numbers count from 1 within the entire file. */
599
600lin
601translate_line_number (struct file_data const *file, lin i)
602{
603 return i + file->prefix_lines + 1;
604}
605
606/* Translate a line number range. This is always done for printing,
607 so for convenience translate to long rather than lin, so that the
608 caller can use printf with "%ld" without casting. */
609
610void
611translate_range (struct file_data const *file,
612 lin a, lin b,
613 long *aptr, long *bptr)
614{
615 *aptr = translate_line_number (file, a - 1) + 1;
616 *bptr = translate_line_number (file, b + 1) - 1;
617}
618
619/* Print a pair of line numbers with SEPCHAR, translated for file FILE.
620 If the two numbers are identical, print just one number.
621
622 Args A and B are internal line numbers.
623 We print the translated (real) line numbers. */
624
625void
626print_number_range (char sepchar, struct file_data *file, lin a, lin b)
627{
628 long trans_a, trans_b;
629 translate_range (file, a, b, &trans_a, &trans_b);
630
631 /* Note: we can have B < A in the case of a range of no lines.
632 In this case, we should print the line number before the range,
633 which is B. */
634 if (trans_b > trans_a)
635 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b);
636 else
637 fprintf (outfile, "%ld", trans_b);
638}
639
640
641/* Look at a hunk of edit script and report the range of lines in each file
642 that it applies to. HUNK is the start of the hunk, which is a chain
643 of `struct change'. The first and last line numbers of file 0 are stored in
644 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1.
645 Note that these are internal line numbers that count from 0.
646
647 If no lines from file 0 are deleted, then FIRST0 is LAST0+1.
648
649 Return UNCHANGED if only ignorable lines are inserted or deleted,
650 OLD if lines of file 0 are deleted,
651 NEW if lines of file 1 are inserted,
652 and CHANGED if both kinds of changes are found. */
653
654enum changes
655analyze_hunk (struct change *hunk,
656 lin *first0, lin *last0,
657 lin *first1, lin *last1)
658{
659 struct change *next;
660 lin l0, l1;
661 lin show_from, show_to;
662 lin i;
663 bool trivial = ignore_blank_lines || ignore_regexp.fastmap;
664 size_t trivial_length = (int) ignore_blank_lines - 1;
665 /* If 0, ignore zero-length lines;
666 if SIZE_MAX, do not ignore lines just because of their length. */
667
668 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */
669 char const * const *linbuf1 = files[1].linbuf;
670
671 show_from = show_to = 0;
672
673 *first0 = hunk->line0;
674 *first1 = hunk->line1;
675
676 next = hunk;
677 do
678 {
679 l0 = next->line0 + next->deleted - 1;
680 l1 = next->line1 + next->inserted - 1;
681 show_from += next->deleted;
682 show_to += next->inserted;
683
684 for (i = next->line0; i <= l0 && trivial; i++)
685 {
686 char const *line = linbuf0[i];
687 size_t len = linbuf0[i + 1] - line - 1;
688 if (len != trivial_length
689 && (! ignore_regexp.fastmap
690 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
691 trivial = 0;
692 }
693
694 for (i = next->line1; i <= l1 && trivial; i++)
695 {
696 char const *line = linbuf1[i];
697 size_t len = linbuf1[i + 1] - line - 1;
698 if (len != trivial_length
699 && (! ignore_regexp.fastmap
700 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0))
701 trivial = 0;
702 }
703 }
704 while ((next = next->link) != 0);
705
706 *last0 = l0;
707 *last1 = l1;
708
709 /* If all inserted or deleted lines are ignorable,
710 tell the caller to ignore this hunk. */
711
712 if (trivial)
713 return UNCHANGED;
714
715 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
716}
717
718
719/* Concatenate three strings, returning a newly malloc'd string. */
720
721char *
722concat (char const *s1, char const *s2, char const *s3)
723{
724 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1);
725 sprintf (new, "%s%s%s", s1, s2, s3);
726 return new;
727}
728
729/* Yield a new block of SIZE bytes, initialized to zero. */
730
731void *
732zalloc (size_t size)
733{
734 void *p = xmalloc (size);
735 memset (p, 0, size);
736 return p;
737}
738
739/* Yield the newly malloc'd pathname
740 of the file in DIR whose filename is FILE. */
741
742char *
743dir_file_pathname (char const *dir, char const *file)
744{
745 char const *base = base_name (dir);
746 bool omit_slash = !*base || base[strlen (base) - 1] == '/';
747 return concat (dir, "/" + omit_slash, file);
748}
749
750
751void
752debug_script (struct change *sp)
753{
754 fflush (stdout);
755
756 for (; sp; sp = sp->link)
757 {
758 long line0 = sp->line0;
759 long line1 = sp->line1;
760 long deleted = sp->deleted;
761 long inserted = sp->inserted;
762 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n",
763 line0, line1, deleted, inserted);
764 }
765
766 fflush (stderr);
767}
Note: See TracBrowser for help on using the repository browser.