| 1 | /* texindex -- sort TeX index dribble output into an actual index. | 
|---|
| 2 | $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $ | 
|---|
| 3 |  | 
|---|
| 4 | Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001, | 
|---|
| 5 | 2002, 2003, 2004 Free Software Foundation, Inc. | 
|---|
| 6 |  | 
|---|
| 7 | This program is free software; you can redistribute it and/or modify | 
|---|
| 8 | it under the terms of the GNU General Public License as published by | 
|---|
| 9 | the Free Software Foundation; either version 2, or (at your option) | 
|---|
| 10 | any later version. | 
|---|
| 11 |  | 
|---|
| 12 | This program is distributed in the hope that it will be useful, | 
|---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 15 | GNU General Public License for more details. | 
|---|
| 16 |  | 
|---|
| 17 | You should have received a copy of the GNU General Public License | 
|---|
| 18 | along with this program; if not, write to the Free Software | 
|---|
| 19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ | 
|---|
| 20 |  | 
|---|
| 21 | #include "system.h" | 
|---|
| 22 | #include <getopt.h> | 
|---|
| 23 |  | 
|---|
| 24 | #ifdef __EMX__ | 
|---|
| 25 | #  include <stdlib.h> | 
|---|
| 26 | #endif | 
|---|
| 27 |  | 
|---|
| 28 | static char *program_name = "texindex"; | 
|---|
| 29 |  | 
|---|
| 30 | #if defined (emacs) | 
|---|
| 31 | #  include "../src/config.h" | 
|---|
| 32 | /* Some s/os.h files redefine these. */ | 
|---|
| 33 | #  undef read | 
|---|
| 34 | #  undef close | 
|---|
| 35 | #  undef write | 
|---|
| 36 | #  undef open | 
|---|
| 37 | #endif | 
|---|
| 38 |  | 
|---|
| 39 | #if !defined (HAVE_MEMSET) | 
|---|
| 40 | #undef memset | 
|---|
| 41 | #define memset(ptr, ignore, count) bzero (ptr, count) | 
|---|
| 42 | #endif | 
|---|
| 43 |  | 
|---|
| 44 | char *mktemp (char *); | 
|---|
| 45 |  | 
|---|
| 46 | #if !defined (SEEK_SET) | 
|---|
| 47 | #  define SEEK_SET 0 | 
|---|
| 48 | #  define SEEK_CUR 1 | 
|---|
| 49 | #  define SEEK_END 2 | 
|---|
| 50 | #endif /* !SEEK_SET */ | 
|---|
| 51 |  | 
|---|
| 52 | struct linebuffer; | 
|---|
| 53 |  | 
|---|
| 54 | /* When sorting in core, this structure describes one line | 
|---|
| 55 | and the position and length of its first keyfield.  */ | 
|---|
| 56 | struct lineinfo | 
|---|
| 57 | { | 
|---|
| 58 | char *text;           /* The actual text of the line. */ | 
|---|
| 59 | union { | 
|---|
| 60 | char *text;         /* The start of the key (for textual comparison). */ | 
|---|
| 61 | long number;        /* The numeric value (for numeric comparison). */ | 
|---|
| 62 | } key; | 
|---|
| 63 | long keylen;          /* Length of KEY field. */ | 
|---|
| 64 | }; | 
|---|
| 65 |  | 
|---|
| 66 | /* This structure describes a field to use as a sort key. */ | 
|---|
| 67 | struct keyfield | 
|---|
| 68 | { | 
|---|
| 69 | int startwords;       /* Number of words to skip. */ | 
|---|
| 70 | int startchars;       /* Number of additional chars to skip. */ | 
|---|
| 71 | int endwords;         /* Number of words to ignore at end. */ | 
|---|
| 72 | int endchars;         /* Ditto for characters of last word. */ | 
|---|
| 73 | char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */ | 
|---|
| 74 | char fold_case;       /* Non-zero means case doesn't matter. */ | 
|---|
| 75 | char reverse;         /* Non-zero means compare in reverse order. */ | 
|---|
| 76 | char numeric;         /* Non-zeros means field is ASCII numeric. */ | 
|---|
| 77 | char positional;      /* Sort according to file position. */ | 
|---|
| 78 | char braced;          /* Count balanced-braced groupings as fields. */ | 
|---|
| 79 | }; | 
|---|
| 80 |  | 
|---|
| 81 | /* Vector of keyfields to use. */ | 
|---|
| 82 | struct keyfield keyfields[3]; | 
|---|
| 83 |  | 
|---|
| 84 | /* Number of keyfields stored in that vector.  */ | 
|---|
| 85 | int num_keyfields = 3; | 
|---|
| 86 |  | 
|---|
| 87 | /* Vector of input file names, terminated with a null pointer. */ | 
|---|
| 88 | char **infiles; | 
|---|
| 89 |  | 
|---|
| 90 | /* Vector of corresponding output file names, or NULL, meaning default it | 
|---|
| 91 | (add an `s' to the end). */ | 
|---|
| 92 | char **outfiles; | 
|---|
| 93 |  | 
|---|
| 94 | /* Length of `infiles'. */ | 
|---|
| 95 | int num_infiles; | 
|---|
| 96 |  | 
|---|
| 97 | /* Pointer to the array of pointers to lines being sorted. */ | 
|---|
| 98 | char **linearray; | 
|---|
| 99 |  | 
|---|
| 100 | /* The allocated length of `linearray'. */ | 
|---|
| 101 | long nlines; | 
|---|
| 102 |  | 
|---|
| 103 | /* Directory to use for temporary files.  On Unix, it ends with a slash.  */ | 
|---|
| 104 | char *tempdir; | 
|---|
| 105 |  | 
|---|
| 106 | /* Number of last temporary file.  */ | 
|---|
| 107 | int tempcount; | 
|---|
| 108 |  | 
|---|
| 109 | /* Number of last temporary file already deleted. | 
|---|
| 110 | Temporary files are deleted by `flush_tempfiles' in order of creation.  */ | 
|---|
| 111 | int last_deleted_tempcount; | 
|---|
| 112 |  | 
|---|
| 113 | /* During in-core sort, this points to the base of the data block | 
|---|
| 114 | which contains all the lines of data.  */ | 
|---|
| 115 | char *text_base; | 
|---|
| 116 |  | 
|---|
| 117 | /* Initially 0; changed to 1 if we want initials in this index.  */ | 
|---|
| 118 | int need_initials; | 
|---|
| 119 |  | 
|---|
| 120 | /* Remembers the first initial letter seen in this index, so we can | 
|---|
| 121 | determine whether we need initials in the sorted form.  */ | 
|---|
| 122 | char first_initial; | 
|---|
| 123 |  | 
|---|
| 124 | /* Additional command switches .*/ | 
|---|
| 125 |  | 
|---|
| 126 | /* Nonzero means do not delete tempfiles -- for debugging. */ | 
|---|
| 127 | int keep_tempfiles; | 
|---|
| 128 |  | 
|---|
| 129 | /* Forward declarations of functions in this file. */ | 
|---|
| 130 | void decode_command (int argc, char **argv); | 
|---|
| 131 | void sort_in_core (char *infile, int total, char *outfile); | 
|---|
| 132 | void sort_offline (char *infile, off_t total, char *outfile); | 
|---|
| 133 | char **parsefile (char *filename, char **nextline, char *data, long int size); | 
|---|
| 134 | char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr); | 
|---|
| 135 | char *find_pos (char *str, int words, int chars, int ignore_blanks); | 
|---|
| 136 | long find_value (char *start, long int length); | 
|---|
| 137 | char *find_braced_pos (char *str, int words, int chars, int ignore_blanks); | 
|---|
| 138 | char *find_braced_end (char *str); | 
|---|
| 139 | void writelines (char **linearray, int nlines, FILE *ostream); | 
|---|
| 140 | int compare_field (struct keyfield *keyfield, char *start1, | 
|---|
| 141 | long int length1, long int pos1, char *start2, | 
|---|
| 142 | long int length2, long int pos2); | 
|---|
| 143 | int compare_full (const void *, const void *); | 
|---|
| 144 | long readline (struct linebuffer *linebuffer, FILE *stream); | 
|---|
| 145 | int merge_files (char **infiles, int nfiles, char *outfile); | 
|---|
| 146 | int merge_direct (char **infiles, int nfiles, char *outfile); | 
|---|
| 147 | void pfatal_with_name (const char *name); | 
|---|
| 148 | void fatal (const char *format, const char *arg); | 
|---|
| 149 | void error (const char *format, const char *arg); | 
|---|
| 150 | void *xmalloc (), *xrealloc (); | 
|---|
| 151 | char *concat (char *s1, char *s2); | 
|---|
| 152 | void flush_tempfiles (int to_count); | 
|---|
| 153 |  | 
|---|
| 154 |  | 
|---|
| 155 | #define MAX_IN_CORE_SORT 500000 | 
|---|
| 156 |  | 
|---|
| 157 | int | 
|---|
| 158 | main (int argc, char **argv) | 
|---|
| 159 | { | 
|---|
| 160 | int i; | 
|---|
| 161 |  | 
|---|
| 162 | tempcount = 0; | 
|---|
| 163 | last_deleted_tempcount = 0; | 
|---|
| 164 |  | 
|---|
| 165 | #ifdef __EMX__ | 
|---|
| 166 | _response(&argc, &argv); | 
|---|
| 167 | _wildcard(&argc, &argv); | 
|---|
| 168 | #endif | 
|---|
| 169 |  | 
|---|
| 170 | #ifdef HAVE_SETLOCALE | 
|---|
| 171 | /* Set locale via LC_ALL.  */ | 
|---|
| 172 | setlocale (LC_ALL, ""); | 
|---|
| 173 | #endif | 
|---|
| 174 |  | 
|---|
| 175 | /* Set the text message domain.  */ | 
|---|
| 176 | bindtextdomain (PACKAGE, LOCALEDIR); | 
|---|
| 177 | textdomain (PACKAGE); | 
|---|
| 178 |  | 
|---|
| 179 | /* In case we write to a redirected stdout that fails.  */ | 
|---|
| 180 | /* not ready atexit (close_stdout); */ | 
|---|
| 181 |  | 
|---|
| 182 | /* Describe the kind of sorting to do. */ | 
|---|
| 183 | /* The first keyfield uses the first braced field and folds case. */ | 
|---|
| 184 | keyfields[0].braced = 1; | 
|---|
| 185 | keyfields[0].fold_case = 1; | 
|---|
| 186 | keyfields[0].endwords = -1; | 
|---|
| 187 | keyfields[0].endchars = -1; | 
|---|
| 188 |  | 
|---|
| 189 | /* The second keyfield uses the second braced field, numerically. */ | 
|---|
| 190 | keyfields[1].braced = 1; | 
|---|
| 191 | keyfields[1].numeric = 1; | 
|---|
| 192 | keyfields[1].startwords = 1; | 
|---|
| 193 | keyfields[1].endwords = -1; | 
|---|
| 194 | keyfields[1].endchars = -1; | 
|---|
| 195 |  | 
|---|
| 196 | /* The third keyfield (which is ignored while discarding duplicates) | 
|---|
| 197 | compares the whole line. */ | 
|---|
| 198 | keyfields[2].endwords = -1; | 
|---|
| 199 | keyfields[2].endchars = -1; | 
|---|
| 200 |  | 
|---|
| 201 | decode_command (argc, argv); | 
|---|
| 202 |  | 
|---|
| 203 | /* Process input files completely, one by one.  */ | 
|---|
| 204 |  | 
|---|
| 205 | for (i = 0; i < num_infiles; i++) | 
|---|
| 206 | { | 
|---|
| 207 | int desc; | 
|---|
| 208 | off_t ptr; | 
|---|
| 209 | char *outfile; | 
|---|
| 210 | struct stat instat; | 
|---|
| 211 |  | 
|---|
| 212 | desc = open (infiles[i], O_RDONLY, 0); | 
|---|
| 213 | if (desc < 0) | 
|---|
| 214 | pfatal_with_name (infiles[i]); | 
|---|
| 215 |  | 
|---|
| 216 | if (stat (infiles[i], &instat)) | 
|---|
| 217 | pfatal_with_name (infiles[i]); | 
|---|
| 218 | if (S_ISDIR (instat.st_mode)) | 
|---|
| 219 | { | 
|---|
| 220 | #ifdef EISDIR | 
|---|
| 221 | errno = EISDIR; | 
|---|
| 222 | #endif | 
|---|
| 223 | pfatal_with_name (infiles[i]); | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | lseek (desc, (off_t) 0, SEEK_END); | 
|---|
| 227 | ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR); | 
|---|
| 228 |  | 
|---|
| 229 | close (desc); | 
|---|
| 230 |  | 
|---|
| 231 | outfile = outfiles[i]; | 
|---|
| 232 | if (!outfile) | 
|---|
| 233 | outfile = concat (infiles[i], "s"); | 
|---|
| 234 |  | 
|---|
| 235 | need_initials = 0; | 
|---|
| 236 | first_initial = '\0'; | 
|---|
| 237 |  | 
|---|
| 238 | if (ptr < MAX_IN_CORE_SORT) | 
|---|
| 239 | /* Sort a small amount of data. */ | 
|---|
| 240 | sort_in_core (infiles[i], (int)ptr, outfile); | 
|---|
| 241 | else | 
|---|
| 242 | sort_offline (infiles[i], ptr, outfile); | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | flush_tempfiles (tempcount); | 
|---|
| 246 | xexit (0); | 
|---|
| 247 | return 0; /* Avoid bogus warnings.  */ | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 |  | 
|---|
| 251 | typedef struct | 
|---|
| 252 | { | 
|---|
| 253 | char *long_name; | 
|---|
| 254 | char *short_name; | 
|---|
| 255 | int *variable_ref; | 
|---|
| 256 | int variable_value; | 
|---|
| 257 | char *arg_name; | 
|---|
| 258 | char *doc_string; | 
|---|
| 259 | } TEXINDEX_OPTION; | 
|---|
| 260 |  | 
|---|
| 261 | TEXINDEX_OPTION texindex_options[] = { | 
|---|
| 262 | { "--help", "-h", (int *)NULL, 0, (char *)NULL, | 
|---|
| 263 | N_("display this help and exit") }, | 
|---|
| 264 | { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL, | 
|---|
| 265 | N_("keep temporary files around after processing") }, | 
|---|
| 266 | { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL, | 
|---|
| 267 | N_("do not keep temporary files around after processing (default)") }, | 
|---|
| 268 | { "--output", "-o", (int *)NULL, 0, "FILE", | 
|---|
| 269 | N_("send output to FILE") }, | 
|---|
| 270 | { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL, | 
|---|
| 271 | N_("display version information and exit") }, | 
|---|
| 272 | { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL } | 
|---|
| 273 | }; | 
|---|
| 274 |  | 
|---|
| 275 | void | 
|---|
| 276 | usage (int result_value) | 
|---|
| 277 | { | 
|---|
| 278 | register int i; | 
|---|
| 279 | FILE *f = result_value ? stderr : stdout; | 
|---|
| 280 |  | 
|---|
| 281 | fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name); | 
|---|
| 282 | fprintf (f, _("Generate a sorted index for each TeX output FILE.\n")); | 
|---|
| 283 | /* Avoid trigraph nonsense.  */ | 
|---|
| 284 | fprintf (f, | 
|---|
| 285 | _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"), | 
|---|
| 286 | '?', '?'); /* avoid trigraph in cat-id-tbl.c */ | 
|---|
| 287 | fprintf (f, _("\nOptions:\n")); | 
|---|
| 288 |  | 
|---|
| 289 | for (i = 0; texindex_options[i].long_name; i++) | 
|---|
| 290 | { | 
|---|
| 291 | putc (' ', f); | 
|---|
| 292 |  | 
|---|
| 293 | if (texindex_options[i].short_name) | 
|---|
| 294 | fprintf (f, "%s, ", texindex_options[i].short_name); | 
|---|
| 295 |  | 
|---|
| 296 | fprintf (f, "%s %s", | 
|---|
| 297 | texindex_options[i].long_name, | 
|---|
| 298 | texindex_options[i].arg_name | 
|---|
| 299 | ? texindex_options[i].arg_name : ""); | 
|---|
| 300 |  | 
|---|
| 301 | fprintf (f, "\t%s\n", _(texindex_options[i].doc_string)); | 
|---|
| 302 | } | 
|---|
| 303 | fputs (_("\n\ | 
|---|
| 304 | Email bug reports to bug-texinfo@gnu.org,\n\ | 
|---|
| 305 | general questions and discussion to help-texinfo@gnu.org.\n\ | 
|---|
| 306 | Texinfo home page: http://www.gnu.org/software/texinfo/"), f); | 
|---|
| 307 | fputs ("\n", f); | 
|---|
| 308 |  | 
|---|
| 309 | xexit (result_value); | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | /* Decode the command line arguments to set the parameter variables | 
|---|
| 313 | and set up the vector of keyfields and the vector of input files. */ | 
|---|
| 314 |  | 
|---|
| 315 | void | 
|---|
| 316 | decode_command (int argc, char **argv) | 
|---|
| 317 | { | 
|---|
| 318 | int arg_index = 1; | 
|---|
| 319 | char **ip; | 
|---|
| 320 | char **op; | 
|---|
| 321 |  | 
|---|
| 322 | /* Store default values into parameter variables. */ | 
|---|
| 323 |  | 
|---|
| 324 | tempdir = getenv ("TMPDIR"); | 
|---|
| 325 | if (tempdir == NULL) | 
|---|
| 326 | tempdir = getenv ("TEMP"); | 
|---|
| 327 | if (tempdir == NULL) | 
|---|
| 328 | tempdir = getenv ("TMP"); | 
|---|
| 329 | if (tempdir == NULL) | 
|---|
| 330 | tempdir = DEFAULT_TMPDIR; | 
|---|
| 331 | else | 
|---|
| 332 | tempdir = concat (tempdir, "/"); | 
|---|
| 333 |  | 
|---|
| 334 | keep_tempfiles = 0; | 
|---|
| 335 |  | 
|---|
| 336 | /* Allocate ARGC input files, which must be enough.  */ | 
|---|
| 337 |  | 
|---|
| 338 | infiles = (char **) xmalloc (argc * sizeof (char *)); | 
|---|
| 339 | outfiles = (char **) xmalloc (argc * sizeof (char *)); | 
|---|
| 340 | ip = infiles; | 
|---|
| 341 | op = outfiles; | 
|---|
| 342 |  | 
|---|
| 343 | while (arg_index < argc) | 
|---|
| 344 | { | 
|---|
| 345 | char *arg = argv[arg_index++]; | 
|---|
| 346 |  | 
|---|
| 347 | if (*arg == '-') | 
|---|
| 348 | { | 
|---|
| 349 | if (strcmp (arg, "--version") == 0) | 
|---|
| 350 | { | 
|---|
| 351 | printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION); | 
|---|
| 352 | puts (""); | 
|---|
| 353 | puts ("Copyright (C) 2004 Free Software Foundation, Inc."); | 
|---|
| 354 | printf (_("There is NO warranty.  You may redistribute this software\n\ | 
|---|
| 355 | under the terms of the GNU General Public License.\n\ | 
|---|
| 356 | For more information about these matters, see the files named COPYING.\n")); | 
|---|
| 357 | xexit (0); | 
|---|
| 358 | } | 
|---|
| 359 | else if ((strcmp (arg, "--keep") == 0) || | 
|---|
| 360 | (strcmp (arg, "-k") == 0)) | 
|---|
| 361 | { | 
|---|
| 362 | keep_tempfiles = 1; | 
|---|
| 363 | } | 
|---|
| 364 | else if ((strcmp (arg, "--help") == 0) || | 
|---|
| 365 | (strcmp (arg, "-h") == 0)) | 
|---|
| 366 | { | 
|---|
| 367 | usage (0); | 
|---|
| 368 | } | 
|---|
| 369 | else if ((strcmp (arg, "--output") == 0) || | 
|---|
| 370 | (strcmp (arg, "-o") == 0)) | 
|---|
| 371 | { | 
|---|
| 372 | if (argv[arg_index] != (char *)NULL) | 
|---|
| 373 | { | 
|---|
| 374 | arg_index++; | 
|---|
| 375 | if (op > outfiles) | 
|---|
| 376 | *(op - 1) = argv[arg_index]; | 
|---|
| 377 | } | 
|---|
| 378 | else | 
|---|
| 379 | usage (1); | 
|---|
| 380 | } | 
|---|
| 381 | else | 
|---|
| 382 | usage (1); | 
|---|
| 383 | } | 
|---|
| 384 | else | 
|---|
| 385 | { | 
|---|
| 386 | *ip++ = arg; | 
|---|
| 387 | *op++ = (char *)NULL; | 
|---|
| 388 | } | 
|---|
| 389 | } | 
|---|
| 390 |  | 
|---|
| 391 | /* Record number of keyfields and terminate list of filenames. */ | 
|---|
| 392 | num_infiles = ip - infiles; | 
|---|
| 393 | *ip = (char *)NULL; | 
|---|
| 394 | if (num_infiles == 0) | 
|---|
| 395 | usage (1); | 
|---|
| 396 | } | 
|---|
| 397 |  | 
|---|
| 398 |  | 
|---|
| 399 | /* Return a name for temporary file COUNT. */ | 
|---|
| 400 |  | 
|---|
| 401 | static char * | 
|---|
| 402 | maketempname (int count) | 
|---|
| 403 | { | 
|---|
| 404 | static char *tempbase = NULL; | 
|---|
| 405 | char tempsuffix[10]; | 
|---|
| 406 |  | 
|---|
| 407 | if (!tempbase) | 
|---|
| 408 | { | 
|---|
| 409 | int fd; | 
|---|
| 410 | tempbase = concat (tempdir, "txidxXXXXXX"); | 
|---|
| 411 |  | 
|---|
| 412 | fd = mkstemp (tempbase); | 
|---|
| 413 | if (fd == -1) | 
|---|
| 414 | pfatal_with_name (tempbase); | 
|---|
| 415 | } | 
|---|
| 416 |  | 
|---|
| 417 | sprintf (tempsuffix, ".%d", count); | 
|---|
| 418 | return concat (tempbase, tempsuffix); | 
|---|
| 419 | } | 
|---|
| 420 |  | 
|---|
| 421 |  | 
|---|
| 422 | /* Delete all temporary files up to TO_COUNT. */ | 
|---|
| 423 |  | 
|---|
| 424 | void | 
|---|
| 425 | flush_tempfiles (int to_count) | 
|---|
| 426 | { | 
|---|
| 427 | if (keep_tempfiles) | 
|---|
| 428 | return; | 
|---|
| 429 | while (last_deleted_tempcount < to_count) | 
|---|
| 430 | unlink (maketempname (++last_deleted_tempcount)); | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 |  | 
|---|
| 434 |  | 
|---|
| 435 | /* Compare LINE1 and LINE2 according to the specified set of keyfields. */ | 
|---|
| 436 |  | 
|---|
| 437 | int | 
|---|
| 438 | compare_full (const void *p1, const void *p2) | 
|---|
| 439 | { | 
|---|
| 440 | char **line1 = (char **) p1; | 
|---|
| 441 | char **line2 = (char **) p2; | 
|---|
| 442 | int i; | 
|---|
| 443 |  | 
|---|
| 444 | /* Compare using the first keyfield; | 
|---|
| 445 | if that does not distinguish the lines, try the second keyfield; | 
|---|
| 446 | and so on. */ | 
|---|
| 447 |  | 
|---|
| 448 | for (i = 0; i < num_keyfields; i++) | 
|---|
| 449 | { | 
|---|
| 450 | long length1, length2; | 
|---|
| 451 | char *start1 = find_field (&keyfields[i], *line1, &length1); | 
|---|
| 452 | char *start2 = find_field (&keyfields[i], *line2, &length2); | 
|---|
| 453 | int tem = compare_field (&keyfields[i], start1, length1, | 
|---|
| 454 | *line1 - text_base, | 
|---|
| 455 | start2, length2, *line2 - text_base); | 
|---|
| 456 | if (tem) | 
|---|
| 457 | { | 
|---|
| 458 | if (keyfields[i].reverse) | 
|---|
| 459 | return -tem; | 
|---|
| 460 | return tem; | 
|---|
| 461 | } | 
|---|
| 462 | } | 
|---|
| 463 |  | 
|---|
| 464 | return 0;                     /* Lines match exactly. */ | 
|---|
| 465 | } | 
|---|
| 466 |  | 
|---|
| 467 | /* Compare LINE1 and LINE2, described by structures | 
|---|
| 468 | in which the first keyfield is identified in advance. | 
|---|
| 469 | For positional sorting, assumes that the order of the lines in core | 
|---|
| 470 | reflects their nominal order.  */ | 
|---|
| 471 | int | 
|---|
| 472 | compare_prepared (const void *p1, const void *p2) | 
|---|
| 473 | { | 
|---|
| 474 | struct lineinfo *line1 = (struct lineinfo *) p1; | 
|---|
| 475 | struct lineinfo *line2 = (struct lineinfo *) p2; | 
|---|
| 476 | int i; | 
|---|
| 477 | int tem; | 
|---|
| 478 | char *text1, *text2; | 
|---|
| 479 |  | 
|---|
| 480 | /* Compare using the first keyfield, which has been found for us already. */ | 
|---|
| 481 | if (keyfields->positional) | 
|---|
| 482 | { | 
|---|
| 483 | if (line1->text - text_base > line2->text - text_base) | 
|---|
| 484 | tem = 1; | 
|---|
| 485 | else | 
|---|
| 486 | tem = -1; | 
|---|
| 487 | } | 
|---|
| 488 | else if (keyfields->numeric) | 
|---|
| 489 | tem = line1->key.number - line2->key.number; | 
|---|
| 490 | else | 
|---|
| 491 | tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, | 
|---|
| 492 | line2->key.text, line2->keylen, 0); | 
|---|
| 493 | if (tem) | 
|---|
| 494 | { | 
|---|
| 495 | if (keyfields->reverse) | 
|---|
| 496 | return -tem; | 
|---|
| 497 | return tem; | 
|---|
| 498 | } | 
|---|
| 499 |  | 
|---|
| 500 | text1 = line1->text; | 
|---|
| 501 | text2 = line2->text; | 
|---|
| 502 |  | 
|---|
| 503 | /* Compare using the second keyfield; | 
|---|
| 504 | if that does not distinguish the lines, try the third keyfield; | 
|---|
| 505 | and so on. */ | 
|---|
| 506 |  | 
|---|
| 507 | for (i = 1; i < num_keyfields; i++) | 
|---|
| 508 | { | 
|---|
| 509 | long length1, length2; | 
|---|
| 510 | char *start1 = find_field (&keyfields[i], text1, &length1); | 
|---|
| 511 | char *start2 = find_field (&keyfields[i], text2, &length2); | 
|---|
| 512 | int tem = compare_field (&keyfields[i], start1, length1, | 
|---|
| 513 | text1 - text_base, | 
|---|
| 514 | start2, length2, text2 - text_base); | 
|---|
| 515 | if (tem) | 
|---|
| 516 | { | 
|---|
| 517 | if (keyfields[i].reverse) | 
|---|
| 518 | return -tem; | 
|---|
| 519 | return tem; | 
|---|
| 520 | } | 
|---|
| 521 | } | 
|---|
| 522 |  | 
|---|
| 523 | return 0;                     /* Lines match exactly. */ | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | /* Like compare_full but more general. | 
|---|
| 527 | You can pass any strings, and you can say how many keyfields to use. | 
|---|
| 528 | POS1 and POS2 should indicate the nominal positional ordering of | 
|---|
| 529 | the two lines in the input.  */ | 
|---|
| 530 |  | 
|---|
| 531 | int | 
|---|
| 532 | compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields) | 
|---|
| 533 | { | 
|---|
| 534 | int i; | 
|---|
| 535 |  | 
|---|
| 536 | /* Compare using the first keyfield; | 
|---|
| 537 | if that does not distinguish the lines, try the second keyfield; | 
|---|
| 538 | and so on. */ | 
|---|
| 539 |  | 
|---|
| 540 | for (i = 0; i < use_keyfields; i++) | 
|---|
| 541 | { | 
|---|
| 542 | long length1, length2; | 
|---|
| 543 | char *start1 = find_field (&keyfields[i], str1, &length1); | 
|---|
| 544 | char *start2 = find_field (&keyfields[i], str2, &length2); | 
|---|
| 545 | int tem = compare_field (&keyfields[i], start1, length1, pos1, | 
|---|
| 546 | start2, length2, pos2); | 
|---|
| 547 | if (tem) | 
|---|
| 548 | { | 
|---|
| 549 | if (keyfields[i].reverse) | 
|---|
| 550 | return -tem; | 
|---|
| 551 | return tem; | 
|---|
| 552 | } | 
|---|
| 553 | } | 
|---|
| 554 |  | 
|---|
| 555 | return 0;                     /* Lines match exactly. */ | 
|---|
| 556 | } | 
|---|
| 557 |  | 
|---|
| 558 | /* Find the start and length of a field in STR according to KEYFIELD. | 
|---|
| 559 | A pointer to the starting character is returned, and the length | 
|---|
| 560 | is stored into the int that LENGTHPTR points to.  */ | 
|---|
| 561 |  | 
|---|
| 562 | char * | 
|---|
| 563 | find_field (struct keyfield *keyfield, char *str, long int *lengthptr) | 
|---|
| 564 | { | 
|---|
| 565 | char *start; | 
|---|
| 566 | char *end; | 
|---|
| 567 | char *(*fun) (); | 
|---|
| 568 |  | 
|---|
| 569 | if (keyfield->braced) | 
|---|
| 570 | fun = find_braced_pos; | 
|---|
| 571 | else | 
|---|
| 572 | fun = find_pos; | 
|---|
| 573 |  | 
|---|
| 574 | start = (*fun) (str, keyfield->startwords, keyfield->startchars, | 
|---|
| 575 | keyfield->ignore_blanks); | 
|---|
| 576 | if (keyfield->endwords < 0) | 
|---|
| 577 | { | 
|---|
| 578 | if (keyfield->braced) | 
|---|
| 579 | end = find_braced_end (start); | 
|---|
| 580 | else | 
|---|
| 581 | { | 
|---|
| 582 | end = start; | 
|---|
| 583 | while (*end && *end != '\n') | 
|---|
| 584 | end++; | 
|---|
| 585 | } | 
|---|
| 586 | } | 
|---|
| 587 | else | 
|---|
| 588 | { | 
|---|
| 589 | end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); | 
|---|
| 590 | if (end - str < start - str) | 
|---|
| 591 | end = start; | 
|---|
| 592 | } | 
|---|
| 593 | *lengthptr = end - start; | 
|---|
| 594 | return start; | 
|---|
| 595 | } | 
|---|
| 596 |  | 
|---|
| 597 | /* Return a pointer to a specified place within STR, | 
|---|
| 598 | skipping (from the beginning) WORDS words and then CHARS chars. | 
|---|
| 599 | If IGNORE_BLANKS is nonzero, we skip all blanks | 
|---|
| 600 | after finding the specified word.  */ | 
|---|
| 601 |  | 
|---|
| 602 | char * | 
|---|
| 603 | find_pos (char *str, int words, int chars, int ignore_blanks) | 
|---|
| 604 | { | 
|---|
| 605 | int i; | 
|---|
| 606 | char *p = str; | 
|---|
| 607 |  | 
|---|
| 608 | for (i = 0; i < words; i++) | 
|---|
| 609 | { | 
|---|
| 610 | char c; | 
|---|
| 611 | /* Find next bunch of nonblanks and skip them. */ | 
|---|
| 612 | while ((c = *p) == ' ' || c == '\t') | 
|---|
| 613 | p++; | 
|---|
| 614 | while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) | 
|---|
| 615 | p++; | 
|---|
| 616 | if (!*p || *p == '\n') | 
|---|
| 617 | return p; | 
|---|
| 618 | } | 
|---|
| 619 |  | 
|---|
| 620 | while (*p == ' ' || *p == '\t') | 
|---|
| 621 | p++; | 
|---|
| 622 |  | 
|---|
| 623 | for (i = 0; i < chars; i++) | 
|---|
| 624 | { | 
|---|
| 625 | if (!*p || *p == '\n') | 
|---|
| 626 | break; | 
|---|
| 627 | p++; | 
|---|
| 628 | } | 
|---|
| 629 | return p; | 
|---|
| 630 | } | 
|---|
| 631 |  | 
|---|
| 632 | /* Like find_pos but assumes that each field is surrounded by braces | 
|---|
| 633 | and that braces within fields are balanced. */ | 
|---|
| 634 |  | 
|---|
| 635 | char * | 
|---|
| 636 | find_braced_pos (char *str, int words, int chars, int ignore_blanks) | 
|---|
| 637 | { | 
|---|
| 638 | int i; | 
|---|
| 639 | int bracelevel; | 
|---|
| 640 | char *p = str; | 
|---|
| 641 | char c; | 
|---|
| 642 |  | 
|---|
| 643 | for (i = 0; i < words; i++) | 
|---|
| 644 | { | 
|---|
| 645 | bracelevel = 1; | 
|---|
| 646 | while ((c = *p++) != '{' && c != '\n' && c) | 
|---|
| 647 | /* Do nothing. */ ; | 
|---|
| 648 | if (c != '{') | 
|---|
| 649 | return p - 1; | 
|---|
| 650 | while (bracelevel) | 
|---|
| 651 | { | 
|---|
| 652 | c = *p++; | 
|---|
| 653 | if (c == '{') | 
|---|
| 654 | bracelevel++; | 
|---|
| 655 | if (c == '}') | 
|---|
| 656 | bracelevel--; | 
|---|
| 657 | if (c == 0 || c == '\n') | 
|---|
| 658 | return p - 1; | 
|---|
| 659 | } | 
|---|
| 660 | } | 
|---|
| 661 |  | 
|---|
| 662 | while ((c = *p++) != '{' && c != '\n' && c) | 
|---|
| 663 | /* Do nothing. */ ; | 
|---|
| 664 |  | 
|---|
| 665 | if (c != '{') | 
|---|
| 666 | return p - 1; | 
|---|
| 667 |  | 
|---|
| 668 | if (ignore_blanks) | 
|---|
| 669 | while ((c = *p) == ' ' || c == '\t') | 
|---|
| 670 | p++; | 
|---|
| 671 |  | 
|---|
| 672 | for (i = 0; i < chars; i++) | 
|---|
| 673 | { | 
|---|
| 674 | if (!*p || *p == '\n') | 
|---|
| 675 | break; | 
|---|
| 676 | p++; | 
|---|
| 677 | } | 
|---|
| 678 | return p; | 
|---|
| 679 | } | 
|---|
| 680 |  | 
|---|
| 681 | /* Find the end of the balanced-brace field which starts at STR. | 
|---|
| 682 | The position returned is just before the closing brace. */ | 
|---|
| 683 |  | 
|---|
| 684 | char * | 
|---|
| 685 | find_braced_end (char *str) | 
|---|
| 686 | { | 
|---|
| 687 | int bracelevel; | 
|---|
| 688 | char *p = str; | 
|---|
| 689 | char c; | 
|---|
| 690 |  | 
|---|
| 691 | bracelevel = 1; | 
|---|
| 692 | while (bracelevel) | 
|---|
| 693 | { | 
|---|
| 694 | c = *p++; | 
|---|
| 695 | if (c == '{') | 
|---|
| 696 | bracelevel++; | 
|---|
| 697 | if (c == '}') | 
|---|
| 698 | bracelevel--; | 
|---|
| 699 | if (c == 0 || c == '\n') | 
|---|
| 700 | return p - 1; | 
|---|
| 701 | } | 
|---|
| 702 | return p - 1; | 
|---|
| 703 | } | 
|---|
| 704 |  | 
|---|
| 705 | long | 
|---|
| 706 | find_value (char *start, long int length) | 
|---|
| 707 | { | 
|---|
| 708 | while (length != 0L) | 
|---|
| 709 | { | 
|---|
| 710 | if (isdigit (*start)) | 
|---|
| 711 | return atol (start); | 
|---|
| 712 | length--; | 
|---|
| 713 | start++; | 
|---|
| 714 | } | 
|---|
| 715 | return 0l; | 
|---|
| 716 | } | 
|---|
| 717 |  | 
|---|
| 718 | /* Vector used to translate characters for comparison. | 
|---|
| 719 | This is how we make all alphanumerics follow all else, | 
|---|
| 720 | and ignore case in the first sorting.  */ | 
|---|
| 721 | int char_order[256]; | 
|---|
| 722 |  | 
|---|
| 723 | void | 
|---|
| 724 | init_char_order (void) | 
|---|
| 725 | { | 
|---|
| 726 | int i; | 
|---|
| 727 | for (i = 1; i < 256; i++) | 
|---|
| 728 | char_order[i] = i; | 
|---|
| 729 |  | 
|---|
| 730 | for (i = '0'; i <= '9'; i++) | 
|---|
| 731 | char_order[i] += 512; | 
|---|
| 732 |  | 
|---|
| 733 | for (i = 'a'; i <= 'z'; i++) | 
|---|
| 734 | { | 
|---|
| 735 | char_order[i] = 512 + i; | 
|---|
| 736 | char_order[i + 'A' - 'a'] = 512 + i; | 
|---|
| 737 | } | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | /* Compare two fields (each specified as a start pointer and a character count) | 
|---|
| 741 | according to KEYFIELD. | 
|---|
| 742 | The sign of the value reports the relation between the fields. */ | 
|---|
| 743 |  | 
|---|
| 744 | int | 
|---|
| 745 | compare_field (struct keyfield *keyfield, char *start1, long int length1, | 
|---|
| 746 | long int pos1, char *start2, long int length2, long int pos2) | 
|---|
| 747 | { | 
|---|
| 748 | if (keyfields->positional) | 
|---|
| 749 | { | 
|---|
| 750 | if (pos1 > pos2) | 
|---|
| 751 | return 1; | 
|---|
| 752 | else | 
|---|
| 753 | return -1; | 
|---|
| 754 | } | 
|---|
| 755 | if (keyfield->numeric) | 
|---|
| 756 | { | 
|---|
| 757 | long value = find_value (start1, length1) - find_value (start2, length2); | 
|---|
| 758 | if (value > 0) | 
|---|
| 759 | return 1; | 
|---|
| 760 | if (value < 0) | 
|---|
| 761 | return -1; | 
|---|
| 762 | return 0; | 
|---|
| 763 | } | 
|---|
| 764 | else | 
|---|
| 765 | { | 
|---|
| 766 | char *p1 = start1; | 
|---|
| 767 | char *p2 = start2; | 
|---|
| 768 | char *e1 = start1 + length1; | 
|---|
| 769 | char *e2 = start2 + length2; | 
|---|
| 770 |  | 
|---|
| 771 | while (1) | 
|---|
| 772 | { | 
|---|
| 773 | int c1, c2; | 
|---|
| 774 |  | 
|---|
| 775 | if (p1 == e1) | 
|---|
| 776 | c1 = 0; | 
|---|
| 777 | else | 
|---|
| 778 | c1 = *p1++; | 
|---|
| 779 | if (p2 == e2) | 
|---|
| 780 | c2 = 0; | 
|---|
| 781 | else | 
|---|
| 782 | c2 = *p2++; | 
|---|
| 783 |  | 
|---|
| 784 | if (char_order[c1] != char_order[c2]) | 
|---|
| 785 | return char_order[c1] - char_order[c2]; | 
|---|
| 786 | if (!c1) | 
|---|
| 787 | break; | 
|---|
| 788 | } | 
|---|
| 789 |  | 
|---|
| 790 | /* Strings are equal except possibly for case.  */ | 
|---|
| 791 | p1 = start1; | 
|---|
| 792 | p2 = start2; | 
|---|
| 793 | while (1) | 
|---|
| 794 | { | 
|---|
| 795 | int c1, c2; | 
|---|
| 796 |  | 
|---|
| 797 | if (p1 == e1) | 
|---|
| 798 | c1 = 0; | 
|---|
| 799 | else | 
|---|
| 800 | c1 = *p1++; | 
|---|
| 801 | if (p2 == e2) | 
|---|
| 802 | c2 = 0; | 
|---|
| 803 | else | 
|---|
| 804 | c2 = *p2++; | 
|---|
| 805 |  | 
|---|
| 806 | if (c1 != c2) | 
|---|
| 807 | /* Reverse sign here so upper case comes out last.  */ | 
|---|
| 808 | return c2 - c1; | 
|---|
| 809 | if (!c1) | 
|---|
| 810 | break; | 
|---|
| 811 | } | 
|---|
| 812 |  | 
|---|
| 813 | return 0; | 
|---|
| 814 | } | 
|---|
| 815 | } | 
|---|
| 816 |  | 
|---|
| 817 |  | 
|---|
| 818 | /* A `struct linebuffer' is a structure which holds a line of text. | 
|---|
| 819 | `readline' reads a line from a stream into a linebuffer | 
|---|
| 820 | and works regardless of the length of the line.  */ | 
|---|
| 821 |  | 
|---|
| 822 | struct linebuffer | 
|---|
| 823 | { | 
|---|
| 824 | long size; | 
|---|
| 825 | char *buffer; | 
|---|
| 826 | }; | 
|---|
| 827 |  | 
|---|
| 828 | /* Initialize LINEBUFFER for use. */ | 
|---|
| 829 |  | 
|---|
| 830 | void | 
|---|
| 831 | initbuffer (struct linebuffer *linebuffer) | 
|---|
| 832 | { | 
|---|
| 833 | linebuffer->size = 200; | 
|---|
| 834 | linebuffer->buffer = (char *) xmalloc (200); | 
|---|
| 835 | } | 
|---|
| 836 |  | 
|---|
| 837 | /* Read a line of text from STREAM into LINEBUFFER. | 
|---|
| 838 | Return the length of the line.  */ | 
|---|
| 839 |  | 
|---|
| 840 | long | 
|---|
| 841 | readline (struct linebuffer *linebuffer, FILE *stream) | 
|---|
| 842 | { | 
|---|
| 843 | char *buffer = linebuffer->buffer; | 
|---|
| 844 | char *p = linebuffer->buffer; | 
|---|
| 845 | char *end = p + linebuffer->size; | 
|---|
| 846 |  | 
|---|
| 847 | while (1) | 
|---|
| 848 | { | 
|---|
| 849 | int c = getc (stream); | 
|---|
| 850 | if (p == end) | 
|---|
| 851 | { | 
|---|
| 852 | buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); | 
|---|
| 853 | p += buffer - linebuffer->buffer; | 
|---|
| 854 | end += buffer - linebuffer->buffer; | 
|---|
| 855 | linebuffer->buffer = buffer; | 
|---|
| 856 | } | 
|---|
| 857 | if (c < 0 || c == '\n') | 
|---|
| 858 | { | 
|---|
| 859 | *p = 0; | 
|---|
| 860 | break; | 
|---|
| 861 | } | 
|---|
| 862 | *p++ = c; | 
|---|
| 863 | } | 
|---|
| 864 |  | 
|---|
| 865 | return p - buffer; | 
|---|
| 866 | } | 
|---|
| 867 |  | 
|---|
| 868 |  | 
|---|
| 869 | /* Sort an input file too big to sort in core.  */ | 
|---|
| 870 |  | 
|---|
| 871 | void | 
|---|
| 872 | sort_offline (char *infile, off_t total, char *outfile) | 
|---|
| 873 | { | 
|---|
| 874 | /* More than enough. */ | 
|---|
| 875 | int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; | 
|---|
| 876 | char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | 
|---|
| 877 | FILE *istream = fopen (infile, "r"); | 
|---|
| 878 | int i; | 
|---|
| 879 | struct linebuffer lb; | 
|---|
| 880 | long linelength; | 
|---|
| 881 | int failure = 0; | 
|---|
| 882 |  | 
|---|
| 883 | initbuffer (&lb); | 
|---|
| 884 |  | 
|---|
| 885 | /* Read in one line of input data.  */ | 
|---|
| 886 |  | 
|---|
| 887 | linelength = readline (&lb, istream); | 
|---|
| 888 |  | 
|---|
| 889 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | 
|---|
| 890 | { | 
|---|
| 891 | error (_("%s: not a texinfo index file"), infile); | 
|---|
| 892 | return; | 
|---|
| 893 | } | 
|---|
| 894 |  | 
|---|
| 895 | /* Split up the input into `ntemps' temporary files, or maybe fewer, | 
|---|
| 896 | and put the new files' names into `tempfiles' */ | 
|---|
| 897 |  | 
|---|
| 898 | for (i = 0; i < ntemps; i++) | 
|---|
| 899 | { | 
|---|
| 900 | char *outname = maketempname (++tempcount); | 
|---|
| 901 | FILE *ostream = fopen (outname, "w"); | 
|---|
| 902 | long tempsize = 0; | 
|---|
| 903 |  | 
|---|
| 904 | if (!ostream) | 
|---|
| 905 | pfatal_with_name (outname); | 
|---|
| 906 | tempfiles[i] = outname; | 
|---|
| 907 |  | 
|---|
| 908 | /* Copy lines into this temp file as long as it does not make file | 
|---|
| 909 | "too big" or until there are no more lines.  */ | 
|---|
| 910 |  | 
|---|
| 911 | while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) | 
|---|
| 912 | { | 
|---|
| 913 | tempsize += linelength + 1; | 
|---|
| 914 | fputs (lb.buffer, ostream); | 
|---|
| 915 | putc ('\n', ostream); | 
|---|
| 916 |  | 
|---|
| 917 | /* Read another line of input data.  */ | 
|---|
| 918 |  | 
|---|
| 919 | linelength = readline (&lb, istream); | 
|---|
| 920 | if (!linelength && feof (istream)) | 
|---|
| 921 | break; | 
|---|
| 922 |  | 
|---|
| 923 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | 
|---|
| 924 | { | 
|---|
| 925 | error (_("%s: not a texinfo index file"), infile); | 
|---|
| 926 | failure = 1; | 
|---|
| 927 | goto fail; | 
|---|
| 928 | } | 
|---|
| 929 | } | 
|---|
| 930 | fclose (ostream); | 
|---|
| 931 | if (feof (istream)) | 
|---|
| 932 | break; | 
|---|
| 933 | } | 
|---|
| 934 |  | 
|---|
| 935 | free (lb.buffer); | 
|---|
| 936 |  | 
|---|
| 937 | fail: | 
|---|
| 938 | /* Record number of temp files we actually needed.  */ | 
|---|
| 939 |  | 
|---|
| 940 | ntemps = i; | 
|---|
| 941 |  | 
|---|
| 942 | /* Sort each tempfile into another tempfile. | 
|---|
| 943 | Delete the first set of tempfiles and put the names of the second | 
|---|
| 944 | into `tempfiles'. */ | 
|---|
| 945 |  | 
|---|
| 946 | for (i = 0; i < ntemps; i++) | 
|---|
| 947 | { | 
|---|
| 948 | char *newtemp = maketempname (++tempcount); | 
|---|
| 949 | sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp); | 
|---|
| 950 | if (!keep_tempfiles) | 
|---|
| 951 | unlink (tempfiles[i]); | 
|---|
| 952 | tempfiles[i] = newtemp; | 
|---|
| 953 | } | 
|---|
| 954 |  | 
|---|
| 955 | if (failure) | 
|---|
| 956 | return; | 
|---|
| 957 |  | 
|---|
| 958 | /* Merge the tempfiles together and indexify. */ | 
|---|
| 959 |  | 
|---|
| 960 | merge_files (tempfiles, ntemps, outfile); | 
|---|
| 961 | } | 
|---|
| 962 |  | 
|---|
| 963 |  | 
|---|
| 964 | /* Sort INFILE, whose size is TOTAL, | 
|---|
| 965 | assuming that is small enough to be done in-core, | 
|---|
| 966 | then indexify it and send the output to OUTFILE (or to stdout).  */ | 
|---|
| 967 |  | 
|---|
| 968 | void | 
|---|
| 969 | sort_in_core (char *infile, int total, char *outfile) | 
|---|
| 970 | { | 
|---|
| 971 | char **nextline; | 
|---|
| 972 | char *data = (char *) xmalloc (total + 1); | 
|---|
| 973 | char *file_data; | 
|---|
| 974 | long file_size; | 
|---|
| 975 | int i; | 
|---|
| 976 | FILE *ostream = stdout; | 
|---|
| 977 | struct lineinfo *lineinfo; | 
|---|
| 978 |  | 
|---|
| 979 | /* Read the contents of the file into the moby array `data'. */ | 
|---|
| 980 |  | 
|---|
| 981 | int desc = open (infile, O_RDONLY, 0); | 
|---|
| 982 |  | 
|---|
| 983 | if (desc < 0) | 
|---|
| 984 | fatal (_("failure reopening %s"), infile); | 
|---|
| 985 | for (file_size = 0;;) | 
|---|
| 986 | { | 
|---|
| 987 | i = read (desc, data + file_size, total - file_size); | 
|---|
| 988 | if (i <= 0) | 
|---|
| 989 | break; | 
|---|
| 990 | file_size += i; | 
|---|
| 991 | } | 
|---|
| 992 | file_data = data; | 
|---|
| 993 | data[file_size] = 0; | 
|---|
| 994 |  | 
|---|
| 995 | close (desc); | 
|---|
| 996 |  | 
|---|
| 997 | if (file_size > 0 && data[0] != '\\' && data[0] != '@') | 
|---|
| 998 | { | 
|---|
| 999 | error (_("%s: not a texinfo index file"), infile); | 
|---|
| 1000 | return; | 
|---|
| 1001 | } | 
|---|
| 1002 |  | 
|---|
| 1003 | init_char_order (); | 
|---|
| 1004 |  | 
|---|
| 1005 | /* Sort routines want to know this address. */ | 
|---|
| 1006 |  | 
|---|
| 1007 | text_base = data; | 
|---|
| 1008 |  | 
|---|
| 1009 | /* Create the array of pointers to lines, with a default size | 
|---|
| 1010 | frequently enough.  */ | 
|---|
| 1011 |  | 
|---|
| 1012 | nlines = total / 50; | 
|---|
| 1013 | if (!nlines) | 
|---|
| 1014 | nlines = 2; | 
|---|
| 1015 | linearray = (char **) xmalloc (nlines * sizeof (char *)); | 
|---|
| 1016 |  | 
|---|
| 1017 | /* `nextline' points to the next free slot in this array. | 
|---|
| 1018 | `nlines' is the allocated size.  */ | 
|---|
| 1019 |  | 
|---|
| 1020 | nextline = linearray; | 
|---|
| 1021 |  | 
|---|
| 1022 | /* Parse the input file's data, and make entries for the lines.  */ | 
|---|
| 1023 |  | 
|---|
| 1024 | nextline = parsefile (infile, nextline, file_data, file_size); | 
|---|
| 1025 | if (nextline == 0) | 
|---|
| 1026 | { | 
|---|
| 1027 | error (_("%s: not a texinfo index file"), infile); | 
|---|
| 1028 | return; | 
|---|
| 1029 | } | 
|---|
| 1030 |  | 
|---|
| 1031 | /* Sort the lines. */ | 
|---|
| 1032 |  | 
|---|
| 1033 | /* If we have enough space, find the first keyfield of each line in advance. | 
|---|
| 1034 | Make a `struct lineinfo' for each line, which records the keyfield | 
|---|
| 1035 | as well as the line, and sort them.  */ | 
|---|
| 1036 |  | 
|---|
| 1037 | lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo)); | 
|---|
| 1038 |  | 
|---|
| 1039 | if (lineinfo) | 
|---|
| 1040 | { | 
|---|
| 1041 | struct lineinfo *lp; | 
|---|
| 1042 | char **p; | 
|---|
| 1043 |  | 
|---|
| 1044 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | 
|---|
| 1045 | { | 
|---|
| 1046 | lp->text = *p; | 
|---|
| 1047 | lp->key.text = find_field (keyfields, *p, &lp->keylen); | 
|---|
| 1048 | if (keyfields->numeric) | 
|---|
| 1049 | lp->key.number = find_value (lp->key.text, lp->keylen); | 
|---|
| 1050 | } | 
|---|
| 1051 |  | 
|---|
| 1052 | qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), | 
|---|
| 1053 | compare_prepared); | 
|---|
| 1054 |  | 
|---|
| 1055 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | 
|---|
| 1056 | *p = lp->text; | 
|---|
| 1057 |  | 
|---|
| 1058 | free (lineinfo); | 
|---|
| 1059 | } | 
|---|
| 1060 | else | 
|---|
| 1061 | qsort (linearray, nextline - linearray, sizeof (char *), compare_full); | 
|---|
| 1062 |  | 
|---|
| 1063 | /* Open the output file. */ | 
|---|
| 1064 |  | 
|---|
| 1065 | if (outfile) | 
|---|
| 1066 | { | 
|---|
| 1067 | ostream = fopen (outfile, "w"); | 
|---|
| 1068 | if (!ostream) | 
|---|
| 1069 | pfatal_with_name (outfile); | 
|---|
| 1070 | } | 
|---|
| 1071 |  | 
|---|
| 1072 | writelines (linearray, nextline - linearray, ostream); | 
|---|
| 1073 | if (outfile) | 
|---|
| 1074 | fclose (ostream); | 
|---|
| 1075 |  | 
|---|
| 1076 | free (linearray); | 
|---|
| 1077 | free (data); | 
|---|
| 1078 | } | 
|---|
| 1079 |  | 
|---|
| 1080 |  | 
|---|
| 1081 | /* Parse an input string in core into lines. | 
|---|
| 1082 | DATA is the input string, and SIZE is its length. | 
|---|
| 1083 | Data goes in LINEARRAY starting at NEXTLINE. | 
|---|
| 1084 | The value returned is the first entry in LINEARRAY still unused. | 
|---|
| 1085 | Value 0 means input file contents are invalid.  */ | 
|---|
| 1086 |  | 
|---|
| 1087 | char ** | 
|---|
| 1088 | parsefile (char *filename, char **nextline, char *data, long int size) | 
|---|
| 1089 | { | 
|---|
| 1090 | char *p, *end; | 
|---|
| 1091 | char **line = nextline; | 
|---|
| 1092 |  | 
|---|
| 1093 | p = data; | 
|---|
| 1094 | end = p + size; | 
|---|
| 1095 | *end = 0; | 
|---|
| 1096 |  | 
|---|
| 1097 | while (p != end) | 
|---|
| 1098 | { | 
|---|
| 1099 | if (p[0] != '\\' && p[0] != '@') | 
|---|
| 1100 | return 0; | 
|---|
| 1101 |  | 
|---|
| 1102 | *line = p; | 
|---|
| 1103 |  | 
|---|
| 1104 | /* Find the first letter of the first field of this line.  If it | 
|---|
| 1105 | is different from the first letter of the first field of the | 
|---|
| 1106 | first line, we need initial headers in the output index.  */ | 
|---|
| 1107 | while (*p && *p != '{') | 
|---|
| 1108 | p++; | 
|---|
| 1109 | if (p == end) | 
|---|
| 1110 | return 0; | 
|---|
| 1111 | p++; | 
|---|
| 1112 | if (first_initial) | 
|---|
| 1113 | { | 
|---|
| 1114 | if (first_initial != toupper (*p)) | 
|---|
| 1115 | need_initials = 1; | 
|---|
| 1116 | } | 
|---|
| 1117 | else | 
|---|
| 1118 | first_initial = toupper (*p); | 
|---|
| 1119 |  | 
|---|
| 1120 | while (*p && *p != '\n') | 
|---|
| 1121 | p++; | 
|---|
| 1122 | if (p != end) | 
|---|
| 1123 | p++; | 
|---|
| 1124 |  | 
|---|
| 1125 | line++; | 
|---|
| 1126 | if (line == linearray + nlines) | 
|---|
| 1127 | { | 
|---|
| 1128 | char **old = linearray; | 
|---|
| 1129 | linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4)); | 
|---|
| 1130 | line += linearray - old; | 
|---|
| 1131 | } | 
|---|
| 1132 | } | 
|---|
| 1133 |  | 
|---|
| 1134 | return line; | 
|---|
| 1135 | } | 
|---|
| 1136 |  | 
|---|
| 1137 |  | 
|---|
| 1138 | /* Indexification is a filter applied to the sorted lines | 
|---|
| 1139 | as they are being written to the output file. | 
|---|
| 1140 | Multiple entries for the same name, with different page numbers, | 
|---|
| 1141 | get combined into a single entry with multiple page numbers. | 
|---|
| 1142 | The first braced field, which is used for sorting, is discarded. | 
|---|
| 1143 | However, its first character is examined, folded to lower case, | 
|---|
| 1144 | and if it is different from that in the previous line fed to us | 
|---|
| 1145 | a \initial line is written with one argument, the new initial. | 
|---|
| 1146 |  | 
|---|
| 1147 | If an entry has four braced fields, then the second and third | 
|---|
| 1148 | constitute primary and secondary names. | 
|---|
| 1149 | In this case, each change of primary name | 
|---|
| 1150 | generates a \primary line which contains only the primary name, | 
|---|
| 1151 | and in between these are \secondary lines which contain | 
|---|
| 1152 | just a secondary name and page numbers. */ | 
|---|
| 1153 |  | 
|---|
| 1154 | /* The last primary name we wrote a \primary entry for. | 
|---|
| 1155 | If only one level of indexing is being done, this is the last name seen. */ | 
|---|
| 1156 | char *lastprimary; | 
|---|
| 1157 | /* Length of storage allocated for lastprimary. */ | 
|---|
| 1158 | int lastprimarylength; | 
|---|
| 1159 |  | 
|---|
| 1160 | /* Similar, for the secondary name. */ | 
|---|
| 1161 | char *lastsecondary; | 
|---|
| 1162 | int lastsecondarylength; | 
|---|
| 1163 |  | 
|---|
| 1164 | /* Zero if we are not in the middle of writing an entry. | 
|---|
| 1165 | One if we have written the beginning of an entry but have not | 
|---|
| 1166 | yet written any page numbers into it. | 
|---|
| 1167 | Greater than one if we have written the beginning of an entry | 
|---|
| 1168 | plus at least one page number. */ | 
|---|
| 1169 | int pending; | 
|---|
| 1170 |  | 
|---|
| 1171 | /* The initial (for sorting purposes) of the last primary entry written. | 
|---|
| 1172 | When this changes, a \initial {c} line is written */ | 
|---|
| 1173 |  | 
|---|
| 1174 | char *lastinitial; | 
|---|
| 1175 |  | 
|---|
| 1176 | int lastinitiallength; | 
|---|
| 1177 |  | 
|---|
| 1178 | /* When we need a string of length 1 for the value of lastinitial, | 
|---|
| 1179 | store it here.  */ | 
|---|
| 1180 |  | 
|---|
| 1181 | char lastinitial1[2]; | 
|---|
| 1182 |  | 
|---|
| 1183 | /* Initialize static storage for writing an index. */ | 
|---|
| 1184 |  | 
|---|
| 1185 | void | 
|---|
| 1186 | init_index (void) | 
|---|
| 1187 | { | 
|---|
| 1188 | pending = 0; | 
|---|
| 1189 | lastinitial = lastinitial1; | 
|---|
| 1190 | lastinitial1[0] = 0; | 
|---|
| 1191 | lastinitial1[1] = 0; | 
|---|
| 1192 | lastinitiallength = 0; | 
|---|
| 1193 | lastprimarylength = 100; | 
|---|
| 1194 | lastprimary = (char *) xmalloc (lastprimarylength + 1); | 
|---|
| 1195 | memset (lastprimary, '\0', lastprimarylength + 1); | 
|---|
| 1196 | lastsecondarylength = 100; | 
|---|
| 1197 | lastsecondary = (char *) xmalloc (lastsecondarylength + 1); | 
|---|
| 1198 | memset (lastsecondary, '\0', lastsecondarylength + 1); | 
|---|
| 1199 | } | 
|---|
| 1200 |  | 
|---|
| 1201 | /* Indexify.  Merge entries for the same name, | 
|---|
| 1202 | insert headers for each initial character, etc.  */ | 
|---|
| 1203 |  | 
|---|
| 1204 | void | 
|---|
| 1205 | indexify (char *line, FILE *ostream) | 
|---|
| 1206 | { | 
|---|
| 1207 | char *primary, *secondary, *pagenumber; | 
|---|
| 1208 | int primarylength, secondarylength = 0, pagelength; | 
|---|
| 1209 | int nosecondary; | 
|---|
| 1210 | int initiallength; | 
|---|
| 1211 | char *initial; | 
|---|
| 1212 | char initial1[2]; | 
|---|
| 1213 | register char *p; | 
|---|
| 1214 |  | 
|---|
| 1215 | /* First, analyze the parts of the entry fed to us this time. */ | 
|---|
| 1216 |  | 
|---|
| 1217 | p = find_braced_pos (line, 0, 0, 0); | 
|---|
| 1218 | if (*p == '{') | 
|---|
| 1219 | { | 
|---|
| 1220 | initial = p; | 
|---|
| 1221 | /* Get length of inner pair of braces starting at `p', | 
|---|
| 1222 | including that inner pair of braces.  */ | 
|---|
| 1223 | initiallength = find_braced_end (p + 1) + 1 - p; | 
|---|
| 1224 | } | 
|---|
| 1225 | else | 
|---|
| 1226 | { | 
|---|
| 1227 | initial = initial1; | 
|---|
| 1228 | initial1[0] = toupper (*p); | 
|---|
| 1229 | initial1[1] = 0; | 
|---|
| 1230 | initiallength = 1; | 
|---|
| 1231 | } | 
|---|
| 1232 |  | 
|---|
| 1233 | pagenumber = find_braced_pos (line, 1, 0, 0); | 
|---|
| 1234 | pagelength = find_braced_end (pagenumber) - pagenumber; | 
|---|
| 1235 | if (pagelength == 0) | 
|---|
| 1236 | fatal (_("No page number in %s"), line); | 
|---|
| 1237 |  | 
|---|
| 1238 | primary = find_braced_pos (line, 2, 0, 0); | 
|---|
| 1239 | primarylength = find_braced_end (primary) - primary; | 
|---|
| 1240 |  | 
|---|
| 1241 | secondary = find_braced_pos (line, 3, 0, 0); | 
|---|
| 1242 | nosecondary = !*secondary; | 
|---|
| 1243 | if (!nosecondary) | 
|---|
| 1244 | secondarylength = find_braced_end (secondary) - secondary; | 
|---|
| 1245 |  | 
|---|
| 1246 | /* If the primary is different from before, make a new primary entry. */ | 
|---|
| 1247 | if (strncmp (primary, lastprimary, primarylength)) | 
|---|
| 1248 | { | 
|---|
| 1249 | /* Close off current secondary entry first, if one is open. */ | 
|---|
| 1250 | if (pending) | 
|---|
| 1251 | { | 
|---|
| 1252 | fputs ("}\n", ostream); | 
|---|
| 1253 | pending = 0; | 
|---|
| 1254 | } | 
|---|
| 1255 |  | 
|---|
| 1256 | /* If this primary has a different initial, include an entry for | 
|---|
| 1257 | the initial. */ | 
|---|
| 1258 | if (need_initials && | 
|---|
| 1259 | (initiallength != lastinitiallength || | 
|---|
| 1260 | strncmp (initial, lastinitial, initiallength))) | 
|---|
| 1261 | { | 
|---|
| 1262 | fprintf (ostream, "\\initial {"); | 
|---|
| 1263 | fwrite (initial, 1, initiallength, ostream); | 
|---|
| 1264 | fputs ("}\n", ostream); | 
|---|
| 1265 | if (initial == initial1) | 
|---|
| 1266 | { | 
|---|
| 1267 | lastinitial = lastinitial1; | 
|---|
| 1268 | *lastinitial1 = *initial1; | 
|---|
| 1269 | } | 
|---|
| 1270 | else | 
|---|
| 1271 | { | 
|---|
| 1272 | lastinitial = initial; | 
|---|
| 1273 | } | 
|---|
| 1274 | lastinitiallength = initiallength; | 
|---|
| 1275 | } | 
|---|
| 1276 |  | 
|---|
| 1277 | /* Make the entry for the primary.  */ | 
|---|
| 1278 | if (nosecondary) | 
|---|
| 1279 | fputs ("\\entry {", ostream); | 
|---|
| 1280 | else | 
|---|
| 1281 | fputs ("\\primary {", ostream); | 
|---|
| 1282 | fwrite (primary, primarylength, 1, ostream); | 
|---|
| 1283 | if (nosecondary) | 
|---|
| 1284 | { | 
|---|
| 1285 | fputs ("}{", ostream); | 
|---|
| 1286 | pending = 1; | 
|---|
| 1287 | } | 
|---|
| 1288 | else | 
|---|
| 1289 | fputs ("}\n", ostream); | 
|---|
| 1290 |  | 
|---|
| 1291 | /* Record name of most recent primary. */ | 
|---|
| 1292 | if (lastprimarylength < primarylength) | 
|---|
| 1293 | { | 
|---|
| 1294 | lastprimarylength = primarylength + 100; | 
|---|
| 1295 | lastprimary = (char *) xrealloc (lastprimary, | 
|---|
| 1296 | 1 + lastprimarylength); | 
|---|
| 1297 | } | 
|---|
| 1298 | strncpy (lastprimary, primary, primarylength); | 
|---|
| 1299 | lastprimary[primarylength] = 0; | 
|---|
| 1300 |  | 
|---|
| 1301 | /* There is no current secondary within this primary, now. */ | 
|---|
| 1302 | lastsecondary[0] = 0; | 
|---|
| 1303 | } | 
|---|
| 1304 |  | 
|---|
| 1305 | /* Should not have an entry with no subtopic following one with a | 
|---|
| 1306 | subtopic. */ | 
|---|
| 1307 |  | 
|---|
| 1308 | if (nosecondary && *lastsecondary) | 
|---|
| 1309 | error (_("entry %s follows an entry with a secondary name"), line); | 
|---|
| 1310 |  | 
|---|
| 1311 | /* Start a new secondary entry if necessary. */ | 
|---|
| 1312 | if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) | 
|---|
| 1313 | { | 
|---|
| 1314 | if (pending) | 
|---|
| 1315 | { | 
|---|
| 1316 | fputs ("}\n", ostream); | 
|---|
| 1317 | pending = 0; | 
|---|
| 1318 | } | 
|---|
| 1319 |  | 
|---|
| 1320 | /* Write the entry for the secondary.  */ | 
|---|
| 1321 | fputs ("\\secondary {", ostream); | 
|---|
| 1322 | fwrite (secondary, secondarylength, 1, ostream); | 
|---|
| 1323 | fputs ("}{", ostream); | 
|---|
| 1324 | pending = 1; | 
|---|
| 1325 |  | 
|---|
| 1326 | /* Record name of most recent secondary. */ | 
|---|
| 1327 | if (lastsecondarylength < secondarylength) | 
|---|
| 1328 | { | 
|---|
| 1329 | lastsecondarylength = secondarylength + 100; | 
|---|
| 1330 | lastsecondary = (char *) xrealloc (lastsecondary, | 
|---|
| 1331 | 1 + lastsecondarylength); | 
|---|
| 1332 | } | 
|---|
| 1333 | strncpy (lastsecondary, secondary, secondarylength); | 
|---|
| 1334 | lastsecondary[secondarylength] = 0; | 
|---|
| 1335 | } | 
|---|
| 1336 |  | 
|---|
| 1337 | /* Here to add one more page number to the current entry. */ | 
|---|
| 1338 | if (pending++ != 1) | 
|---|
| 1339 | fputs (", ", ostream);  /* Punctuate first, if this is not the first. */ | 
|---|
| 1340 | fwrite (pagenumber, pagelength, 1, ostream); | 
|---|
| 1341 | } | 
|---|
| 1342 |  | 
|---|
| 1343 | /* Close out any unfinished output entry. */ | 
|---|
| 1344 |  | 
|---|
| 1345 | void | 
|---|
| 1346 | finish_index (FILE *ostream) | 
|---|
| 1347 | { | 
|---|
| 1348 | if (pending) | 
|---|
| 1349 | fputs ("}\n", ostream); | 
|---|
| 1350 | free (lastprimary); | 
|---|
| 1351 | free (lastsecondary); | 
|---|
| 1352 | } | 
|---|
| 1353 |  | 
|---|
| 1354 |  | 
|---|
| 1355 | /* Copy the lines in the sorted order. | 
|---|
| 1356 | Each line is copied out of the input file it was found in. */ | 
|---|
| 1357 |  | 
|---|
| 1358 | void | 
|---|
| 1359 | writelines (char **linearray, int nlines, FILE *ostream) | 
|---|
| 1360 | { | 
|---|
| 1361 | char **stop_line = linearray + nlines; | 
|---|
| 1362 | char **next_line; | 
|---|
| 1363 |  | 
|---|
| 1364 | init_index (); | 
|---|
| 1365 |  | 
|---|
| 1366 | /* Output the text of the lines, and free the buffer space. */ | 
|---|
| 1367 |  | 
|---|
| 1368 | for (next_line = linearray; next_line != stop_line; next_line++) | 
|---|
| 1369 | { | 
|---|
| 1370 | /* If -u was specified, output the line only if distinct from | 
|---|
| 1371 | previous one.  */ | 
|---|
| 1372 | if (next_line == linearray | 
|---|
| 1373 | /* Compare previous line with this one, using only the | 
|---|
| 1374 | explicitly specd keyfields. */ | 
|---|
| 1375 | || compare_general (*(next_line - 1), *next_line, 0L, 0L, | 
|---|
| 1376 | num_keyfields - 1)) | 
|---|
| 1377 | { | 
|---|
| 1378 | char *p = *next_line; | 
|---|
| 1379 | char c; | 
|---|
| 1380 |  | 
|---|
| 1381 | while ((c = *p++) && c != '\n') | 
|---|
| 1382 | /* Do nothing. */ ; | 
|---|
| 1383 | *(p - 1) = 0; | 
|---|
| 1384 | indexify (*next_line, ostream); | 
|---|
| 1385 | } | 
|---|
| 1386 | } | 
|---|
| 1387 |  | 
|---|
| 1388 | finish_index (ostream); | 
|---|
| 1389 | } | 
|---|
| 1390 |  | 
|---|
| 1391 |  | 
|---|
| 1392 | /* Assume (and optionally verify) that each input file is sorted; | 
|---|
| 1393 | merge them and output the result. | 
|---|
| 1394 | Returns nonzero if any input file fails to be sorted. | 
|---|
| 1395 |  | 
|---|
| 1396 | This is the high-level interface that can handle an unlimited | 
|---|
| 1397 | number of files.  */ | 
|---|
| 1398 |  | 
|---|
| 1399 | #define MAX_DIRECT_MERGE 10 | 
|---|
| 1400 |  | 
|---|
| 1401 | int | 
|---|
| 1402 | merge_files (char **infiles, int nfiles, char *outfile) | 
|---|
| 1403 | { | 
|---|
| 1404 | char **tempfiles; | 
|---|
| 1405 | int ntemps; | 
|---|
| 1406 | int i; | 
|---|
| 1407 | int value = 0; | 
|---|
| 1408 | int start_tempcount = tempcount; | 
|---|
| 1409 |  | 
|---|
| 1410 | if (nfiles <= MAX_DIRECT_MERGE) | 
|---|
| 1411 | return merge_direct (infiles, nfiles, outfile); | 
|---|
| 1412 |  | 
|---|
| 1413 | /* Merge groups of MAX_DIRECT_MERGE input files at a time, | 
|---|
| 1414 | making a temporary file to hold each group's result.  */ | 
|---|
| 1415 |  | 
|---|
| 1416 | ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; | 
|---|
| 1417 | tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | 
|---|
| 1418 | for (i = 0; i < ntemps; i++) | 
|---|
| 1419 | { | 
|---|
| 1420 | int nf = MAX_DIRECT_MERGE; | 
|---|
| 1421 | if (i + 1 == ntemps) | 
|---|
| 1422 | nf = nfiles - i * MAX_DIRECT_MERGE; | 
|---|
| 1423 | tempfiles[i] = maketempname (++tempcount); | 
|---|
| 1424 | value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); | 
|---|
| 1425 | } | 
|---|
| 1426 |  | 
|---|
| 1427 | /* All temporary files that existed before are no longer needed | 
|---|
| 1428 | since their contents have been merged into our new tempfiles. | 
|---|
| 1429 | So delete them.  */ | 
|---|
| 1430 | flush_tempfiles (start_tempcount); | 
|---|
| 1431 |  | 
|---|
| 1432 | /* Now merge the temporary files we created.  */ | 
|---|
| 1433 |  | 
|---|
| 1434 | merge_files (tempfiles, ntemps, outfile); | 
|---|
| 1435 |  | 
|---|
| 1436 | free (tempfiles); | 
|---|
| 1437 |  | 
|---|
| 1438 | return value; | 
|---|
| 1439 | } | 
|---|
| 1440 |  | 
|---|
| 1441 |  | 
|---|
| 1442 | /* Assume (and optionally verify) that each input file is sorted; | 
|---|
| 1443 | merge them and output the result. | 
|---|
| 1444 | Returns nonzero if any input file fails to be sorted. | 
|---|
| 1445 |  | 
|---|
| 1446 | This version of merging will not work if the number of | 
|---|
| 1447 | input files gets too high.  Higher level functions | 
|---|
| 1448 | use it only with a bounded number of input files.  */ | 
|---|
| 1449 |  | 
|---|
| 1450 | int | 
|---|
| 1451 | merge_direct (char **infiles, int nfiles, char *outfile) | 
|---|
| 1452 | { | 
|---|
| 1453 | struct linebuffer *lb1, *lb2; | 
|---|
| 1454 | struct linebuffer **thisline, **prevline; | 
|---|
| 1455 | FILE **streams; | 
|---|
| 1456 | int i; | 
|---|
| 1457 | int nleft; | 
|---|
| 1458 | int lossage = 0; | 
|---|
| 1459 | int *file_lossage; | 
|---|
| 1460 | struct linebuffer *prev_out = 0; | 
|---|
| 1461 | FILE *ostream = stdout; | 
|---|
| 1462 |  | 
|---|
| 1463 | if (outfile) | 
|---|
| 1464 | { | 
|---|
| 1465 | ostream = fopen (outfile, "w"); | 
|---|
| 1466 | } | 
|---|
| 1467 | if (!ostream) | 
|---|
| 1468 | pfatal_with_name (outfile); | 
|---|
| 1469 |  | 
|---|
| 1470 | init_index (); | 
|---|
| 1471 |  | 
|---|
| 1472 | if (nfiles == 0) | 
|---|
| 1473 | { | 
|---|
| 1474 | if (outfile) | 
|---|
| 1475 | fclose (ostream); | 
|---|
| 1476 | return 0; | 
|---|
| 1477 | } | 
|---|
| 1478 |  | 
|---|
| 1479 | /* For each file, make two line buffers.  Also, for each file, there | 
|---|
| 1480 | is an element of `thisline' which points at any time to one of the | 
|---|
| 1481 | file's two buffers, and an element of `prevline' which points to | 
|---|
| 1482 | the other buffer.  `thisline' is supposed to point to the next | 
|---|
| 1483 | available line from the file, while `prevline' holds the last file | 
|---|
| 1484 | line used, which is remembered so that we can verify that the file | 
|---|
| 1485 | is properly sorted. */ | 
|---|
| 1486 |  | 
|---|
| 1487 | /* lb1 and lb2 contain one buffer each per file. */ | 
|---|
| 1488 | lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | 
|---|
| 1489 | lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | 
|---|
| 1490 |  | 
|---|
| 1491 | /* thisline[i] points to the linebuffer holding the next available | 
|---|
| 1492 | line in file i, or is zero if there are no lines left in that file.  */ | 
|---|
| 1493 | thisline = (struct linebuffer **) | 
|---|
| 1494 | xmalloc (nfiles * sizeof (struct linebuffer *)); | 
|---|
| 1495 | /* prevline[i] points to the linebuffer holding the last used line | 
|---|
| 1496 | from file i.  This is just for verifying that file i is properly | 
|---|
| 1497 | sorted.  */ | 
|---|
| 1498 | prevline = (struct linebuffer **) | 
|---|
| 1499 | xmalloc (nfiles * sizeof (struct linebuffer *)); | 
|---|
| 1500 | /* streams[i] holds the input stream for file i.  */ | 
|---|
| 1501 | streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); | 
|---|
| 1502 | /* file_lossage[i] is nonzero if we already know file i is not | 
|---|
| 1503 | properly sorted.  */ | 
|---|
| 1504 | file_lossage = (int *) xmalloc (nfiles * sizeof (int)); | 
|---|
| 1505 |  | 
|---|
| 1506 | /* Allocate and initialize all that storage. */ | 
|---|
| 1507 |  | 
|---|
| 1508 | for (i = 0; i < nfiles; i++) | 
|---|
| 1509 | { | 
|---|
| 1510 | initbuffer (&lb1[i]); | 
|---|
| 1511 | initbuffer (&lb2[i]); | 
|---|
| 1512 | thisline[i] = &lb1[i]; | 
|---|
| 1513 | prevline[i] = &lb2[i]; | 
|---|
| 1514 | file_lossage[i] = 0; | 
|---|
| 1515 | streams[i] = fopen (infiles[i], "r"); | 
|---|
| 1516 | if (!streams[i]) | 
|---|
| 1517 | pfatal_with_name (infiles[i]); | 
|---|
| 1518 |  | 
|---|
| 1519 | readline (thisline[i], streams[i]); | 
|---|
| 1520 | } | 
|---|
| 1521 |  | 
|---|
| 1522 | /* Keep count of number of files not at eof. */ | 
|---|
| 1523 | nleft = nfiles; | 
|---|
| 1524 |  | 
|---|
| 1525 | while (nleft) | 
|---|
| 1526 | { | 
|---|
| 1527 | struct linebuffer *best = 0; | 
|---|
| 1528 | struct linebuffer *exch; | 
|---|
| 1529 | int bestfile = -1; | 
|---|
| 1530 | int i; | 
|---|
| 1531 |  | 
|---|
| 1532 | /* Look at the next avail line of each file; choose the least one.  */ | 
|---|
| 1533 |  | 
|---|
| 1534 | for (i = 0; i < nfiles; i++) | 
|---|
| 1535 | { | 
|---|
| 1536 | if (thisline[i] && | 
|---|
| 1537 | (!best || | 
|---|
| 1538 | 0 < compare_general (best->buffer, thisline[i]->buffer, | 
|---|
| 1539 | (long) bestfile, (long) i, num_keyfields))) | 
|---|
| 1540 | { | 
|---|
| 1541 | best = thisline[i]; | 
|---|
| 1542 | bestfile = i; | 
|---|
| 1543 | } | 
|---|
| 1544 | } | 
|---|
| 1545 |  | 
|---|
| 1546 | /* Output that line, unless it matches the previous one and we | 
|---|
| 1547 | don't want duplicates. */ | 
|---|
| 1548 |  | 
|---|
| 1549 | if (!(prev_out && | 
|---|
| 1550 | !compare_general (prev_out->buffer, | 
|---|
| 1551 | best->buffer, 0L, 1L, num_keyfields - 1))) | 
|---|
| 1552 | indexify (best->buffer, ostream); | 
|---|
| 1553 | prev_out = best; | 
|---|
| 1554 |  | 
|---|
| 1555 | /* Now make the line the previous of its file, and fetch a new | 
|---|
| 1556 | line from that file.  */ | 
|---|
| 1557 |  | 
|---|
| 1558 | exch = prevline[bestfile]; | 
|---|
| 1559 | prevline[bestfile] = thisline[bestfile]; | 
|---|
| 1560 | thisline[bestfile] = exch; | 
|---|
| 1561 |  | 
|---|
| 1562 | while (1) | 
|---|
| 1563 | { | 
|---|
| 1564 | /* If the file has no more, mark it empty. */ | 
|---|
| 1565 |  | 
|---|
| 1566 | if (feof (streams[bestfile])) | 
|---|
| 1567 | { | 
|---|
| 1568 | thisline[bestfile] = 0; | 
|---|
| 1569 | /* Update the number of files still not empty. */ | 
|---|
| 1570 | nleft--; | 
|---|
| 1571 | break; | 
|---|
| 1572 | } | 
|---|
| 1573 | readline (thisline[bestfile], streams[bestfile]); | 
|---|
| 1574 | if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) | 
|---|
| 1575 | break; | 
|---|
| 1576 | } | 
|---|
| 1577 | } | 
|---|
| 1578 |  | 
|---|
| 1579 | finish_index (ostream); | 
|---|
| 1580 |  | 
|---|
| 1581 | /* Free all storage and close all input streams. */ | 
|---|
| 1582 |  | 
|---|
| 1583 | for (i = 0; i < nfiles; i++) | 
|---|
| 1584 | { | 
|---|
| 1585 | fclose (streams[i]); | 
|---|
| 1586 | free (lb1[i].buffer); | 
|---|
| 1587 | free (lb2[i].buffer); | 
|---|
| 1588 | } | 
|---|
| 1589 | free (file_lossage); | 
|---|
| 1590 | free (lb1); | 
|---|
| 1591 | free (lb2); | 
|---|
| 1592 | free (thisline); | 
|---|
| 1593 | free (prevline); | 
|---|
| 1594 | free (streams); | 
|---|
| 1595 |  | 
|---|
| 1596 | if (outfile) | 
|---|
| 1597 | fclose (ostream); | 
|---|
| 1598 |  | 
|---|
| 1599 | return lossage; | 
|---|
| 1600 | } | 
|---|
| 1601 |  | 
|---|
| 1602 |  | 
|---|
| 1603 | /* Print error message and exit.  */ | 
|---|
| 1604 |  | 
|---|
| 1605 | void | 
|---|
| 1606 | fatal (const char *format, const char *arg) | 
|---|
| 1607 | { | 
|---|
| 1608 | error (format, arg); | 
|---|
| 1609 | xexit (1); | 
|---|
| 1610 | } | 
|---|
| 1611 |  | 
|---|
| 1612 | /* Print error message.  FORMAT is printf control string, ARG is arg for it. */ | 
|---|
| 1613 | void | 
|---|
| 1614 | error (const char *format, const char *arg) | 
|---|
| 1615 | { | 
|---|
| 1616 | printf ("%s: ", program_name); | 
|---|
| 1617 | printf (format, arg); | 
|---|
| 1618 | if (format[strlen (format) -1] != '\n') | 
|---|
| 1619 | printf ("\n"); | 
|---|
| 1620 | } | 
|---|
| 1621 |  | 
|---|
| 1622 | void | 
|---|
| 1623 | perror_with_name (const char *name) | 
|---|
| 1624 | { | 
|---|
| 1625 | fprintf (stderr, "%s: ", program_name); | 
|---|
| 1626 | perror (name); | 
|---|
| 1627 | } | 
|---|
| 1628 |  | 
|---|
| 1629 | void | 
|---|
| 1630 | pfatal_with_name (const char *name) | 
|---|
| 1631 | { | 
|---|
| 1632 | perror_with_name (name); | 
|---|
| 1633 | xexit (1); | 
|---|
| 1634 | } | 
|---|
| 1635 |  | 
|---|
| 1636 |  | 
|---|
| 1637 |  | 
|---|
| 1638 | /* Return a newly-allocated string concatenating S1 and S2.  */ | 
|---|
| 1639 |  | 
|---|
| 1640 | char * | 
|---|
| 1641 | concat (char *s1, char *s2) | 
|---|
| 1642 | { | 
|---|
| 1643 | int len1 = strlen (s1), len2 = strlen (s2); | 
|---|
| 1644 | char *result = (char *) xmalloc (len1 + len2 + 1); | 
|---|
| 1645 |  | 
|---|
| 1646 | strcpy (result, s1); | 
|---|
| 1647 | strcpy (result + len1, s2); | 
|---|
| 1648 | *(result + len1 + len2) = 0; | 
|---|
| 1649 |  | 
|---|
| 1650 | return result; | 
|---|
| 1651 | } | 
|---|