| 1 | /* texindex -- sort TeX index dribble output into an actual index.
|
|---|
| 2 | $Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp $
|
|---|
| 3 |
|
|---|
| 4 | Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
|
|---|
| 5 | 2002, 2003, 2004 Free Software Foundation, Inc.
|
|---|
| 6 |
|
|---|
| 7 | This program is free software; you can redistribute it and/or modify
|
|---|
| 8 | it under the terms of the GNU General Public License as published by
|
|---|
| 9 | the Free Software Foundation; either version 2, or (at your option)
|
|---|
| 10 | any later version.
|
|---|
| 11 |
|
|---|
| 12 | This program is distributed in the hope that it will be useful,
|
|---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 15 | GNU General Public License for more details.
|
|---|
| 16 |
|
|---|
| 17 | You should have received a copy of the GNU General Public License
|
|---|
| 18 | along with this program; if not, write to the Free Software
|
|---|
| 19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
|
|---|
| 20 |
|
|---|
| 21 | #include "system.h"
|
|---|
| 22 | #include <getopt.h>
|
|---|
| 23 |
|
|---|
| 24 | #ifdef __EMX__
|
|---|
| 25 | # include <stdlib.h>
|
|---|
| 26 | #endif
|
|---|
| 27 |
|
|---|
| 28 | static char *program_name = "texindex";
|
|---|
| 29 |
|
|---|
| 30 | #if defined (emacs)
|
|---|
| 31 | # include "../src/config.h"
|
|---|
| 32 | /* Some s/os.h files redefine these. */
|
|---|
| 33 | # undef read
|
|---|
| 34 | # undef close
|
|---|
| 35 | # undef write
|
|---|
| 36 | # undef open
|
|---|
| 37 | #endif
|
|---|
| 38 |
|
|---|
| 39 | #if !defined (HAVE_MEMSET)
|
|---|
| 40 | #undef memset
|
|---|
| 41 | #define memset(ptr, ignore, count) bzero (ptr, count)
|
|---|
| 42 | #endif
|
|---|
| 43 |
|
|---|
| 44 | char *mktemp (char *);
|
|---|
| 45 |
|
|---|
| 46 | #if !defined (SEEK_SET)
|
|---|
| 47 | # define SEEK_SET 0
|
|---|
| 48 | # define SEEK_CUR 1
|
|---|
| 49 | # define SEEK_END 2
|
|---|
| 50 | #endif /* !SEEK_SET */
|
|---|
| 51 |
|
|---|
| 52 | struct linebuffer;
|
|---|
| 53 |
|
|---|
| 54 | /* When sorting in core, this structure describes one line
|
|---|
| 55 | and the position and length of its first keyfield. */
|
|---|
| 56 | struct lineinfo
|
|---|
| 57 | {
|
|---|
| 58 | char *text; /* The actual text of the line. */
|
|---|
| 59 | union {
|
|---|
| 60 | char *text; /* The start of the key (for textual comparison). */
|
|---|
| 61 | long number; /* The numeric value (for numeric comparison). */
|
|---|
| 62 | } key;
|
|---|
| 63 | long keylen; /* Length of KEY field. */
|
|---|
| 64 | };
|
|---|
| 65 |
|
|---|
| 66 | /* This structure describes a field to use as a sort key. */
|
|---|
| 67 | struct keyfield
|
|---|
| 68 | {
|
|---|
| 69 | int startwords; /* Number of words to skip. */
|
|---|
| 70 | int startchars; /* Number of additional chars to skip. */
|
|---|
| 71 | int endwords; /* Number of words to ignore at end. */
|
|---|
| 72 | int endchars; /* Ditto for characters of last word. */
|
|---|
| 73 | char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
|
|---|
| 74 | char fold_case; /* Non-zero means case doesn't matter. */
|
|---|
| 75 | char reverse; /* Non-zero means compare in reverse order. */
|
|---|
| 76 | char numeric; /* Non-zeros means field is ASCII numeric. */
|
|---|
| 77 | char positional; /* Sort according to file position. */
|
|---|
| 78 | char braced; /* Count balanced-braced groupings as fields. */
|
|---|
| 79 | };
|
|---|
| 80 |
|
|---|
| 81 | /* Vector of keyfields to use. */
|
|---|
| 82 | struct keyfield keyfields[3];
|
|---|
| 83 |
|
|---|
| 84 | /* Number of keyfields stored in that vector. */
|
|---|
| 85 | int num_keyfields = 3;
|
|---|
| 86 |
|
|---|
| 87 | /* Vector of input file names, terminated with a null pointer. */
|
|---|
| 88 | char **infiles;
|
|---|
| 89 |
|
|---|
| 90 | /* Vector of corresponding output file names, or NULL, meaning default it
|
|---|
| 91 | (add an `s' to the end). */
|
|---|
| 92 | char **outfiles;
|
|---|
| 93 |
|
|---|
| 94 | /* Length of `infiles'. */
|
|---|
| 95 | int num_infiles;
|
|---|
| 96 |
|
|---|
| 97 | /* Pointer to the array of pointers to lines being sorted. */
|
|---|
| 98 | char **linearray;
|
|---|
| 99 |
|
|---|
| 100 | /* The allocated length of `linearray'. */
|
|---|
| 101 | long nlines;
|
|---|
| 102 |
|
|---|
| 103 | /* Directory to use for temporary files. On Unix, it ends with a slash. */
|
|---|
| 104 | char *tempdir;
|
|---|
| 105 |
|
|---|
| 106 | /* Number of last temporary file. */
|
|---|
| 107 | int tempcount;
|
|---|
| 108 |
|
|---|
| 109 | /* Number of last temporary file already deleted.
|
|---|
| 110 | Temporary files are deleted by `flush_tempfiles' in order of creation. */
|
|---|
| 111 | int last_deleted_tempcount;
|
|---|
| 112 |
|
|---|
| 113 | /* During in-core sort, this points to the base of the data block
|
|---|
| 114 | which contains all the lines of data. */
|
|---|
| 115 | char *text_base;
|
|---|
| 116 |
|
|---|
| 117 | /* Initially 0; changed to 1 if we want initials in this index. */
|
|---|
| 118 | int need_initials;
|
|---|
| 119 |
|
|---|
| 120 | /* Remembers the first initial letter seen in this index, so we can
|
|---|
| 121 | determine whether we need initials in the sorted form. */
|
|---|
| 122 | char first_initial;
|
|---|
| 123 |
|
|---|
| 124 | /* Additional command switches .*/
|
|---|
| 125 |
|
|---|
| 126 | /* Nonzero means do not delete tempfiles -- for debugging. */
|
|---|
| 127 | int keep_tempfiles;
|
|---|
| 128 |
|
|---|
| 129 | /* Forward declarations of functions in this file. */
|
|---|
| 130 | void decode_command (int argc, char **argv);
|
|---|
| 131 | void sort_in_core (char *infile, int total, char *outfile);
|
|---|
| 132 | void sort_offline (char *infile, off_t total, char *outfile);
|
|---|
| 133 | char **parsefile (char *filename, char **nextline, char *data, long int size);
|
|---|
| 134 | char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
|
|---|
| 135 | char *find_pos (char *str, int words, int chars, int ignore_blanks);
|
|---|
| 136 | long find_value (char *start, long int length);
|
|---|
| 137 | char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
|
|---|
| 138 | char *find_braced_end (char *str);
|
|---|
| 139 | void writelines (char **linearray, int nlines, FILE *ostream);
|
|---|
| 140 | int compare_field (struct keyfield *keyfield, char *start1,
|
|---|
| 141 | long int length1, long int pos1, char *start2,
|
|---|
| 142 | long int length2, long int pos2);
|
|---|
| 143 | int compare_full (const void *, const void *);
|
|---|
| 144 | long readline (struct linebuffer *linebuffer, FILE *stream);
|
|---|
| 145 | int merge_files (char **infiles, int nfiles, char *outfile);
|
|---|
| 146 | int merge_direct (char **infiles, int nfiles, char *outfile);
|
|---|
| 147 | void pfatal_with_name (const char *name);
|
|---|
| 148 | void fatal (const char *format, const char *arg);
|
|---|
| 149 | void error (const char *format, const char *arg);
|
|---|
| 150 | void *xmalloc (), *xrealloc ();
|
|---|
| 151 | char *concat (char *s1, char *s2);
|
|---|
| 152 | void flush_tempfiles (int to_count);
|
|---|
| 153 | |
|---|
| 154 |
|
|---|
| 155 | #define MAX_IN_CORE_SORT 500000
|
|---|
| 156 |
|
|---|
| 157 | int
|
|---|
| 158 | main (int argc, char **argv)
|
|---|
| 159 | {
|
|---|
| 160 | int i;
|
|---|
| 161 |
|
|---|
| 162 | tempcount = 0;
|
|---|
| 163 | last_deleted_tempcount = 0;
|
|---|
| 164 |
|
|---|
| 165 | #ifdef __EMX__
|
|---|
| 166 | _response(&argc, &argv);
|
|---|
| 167 | _wildcard(&argc, &argv);
|
|---|
| 168 | #endif
|
|---|
| 169 |
|
|---|
| 170 | #ifdef HAVE_SETLOCALE
|
|---|
| 171 | /* Set locale via LC_ALL. */
|
|---|
| 172 | setlocale (LC_ALL, "");
|
|---|
| 173 | #endif
|
|---|
| 174 |
|
|---|
| 175 | /* Set the text message domain. */
|
|---|
| 176 | bindtextdomain (PACKAGE, LOCALEDIR);
|
|---|
| 177 | textdomain (PACKAGE);
|
|---|
| 178 |
|
|---|
| 179 | /* In case we write to a redirected stdout that fails. */
|
|---|
| 180 | /* not ready atexit (close_stdout); */
|
|---|
| 181 |
|
|---|
| 182 | /* Describe the kind of sorting to do. */
|
|---|
| 183 | /* The first keyfield uses the first braced field and folds case. */
|
|---|
| 184 | keyfields[0].braced = 1;
|
|---|
| 185 | keyfields[0].fold_case = 1;
|
|---|
| 186 | keyfields[0].endwords = -1;
|
|---|
| 187 | keyfields[0].endchars = -1;
|
|---|
| 188 |
|
|---|
| 189 | /* The second keyfield uses the second braced field, numerically. */
|
|---|
| 190 | keyfields[1].braced = 1;
|
|---|
| 191 | keyfields[1].numeric = 1;
|
|---|
| 192 | keyfields[1].startwords = 1;
|
|---|
| 193 | keyfields[1].endwords = -1;
|
|---|
| 194 | keyfields[1].endchars = -1;
|
|---|
| 195 |
|
|---|
| 196 | /* The third keyfield (which is ignored while discarding duplicates)
|
|---|
| 197 | compares the whole line. */
|
|---|
| 198 | keyfields[2].endwords = -1;
|
|---|
| 199 | keyfields[2].endchars = -1;
|
|---|
| 200 |
|
|---|
| 201 | decode_command (argc, argv);
|
|---|
| 202 |
|
|---|
| 203 | /* Process input files completely, one by one. */
|
|---|
| 204 |
|
|---|
| 205 | for (i = 0; i < num_infiles; i++)
|
|---|
| 206 | {
|
|---|
| 207 | int desc;
|
|---|
| 208 | off_t ptr;
|
|---|
| 209 | char *outfile;
|
|---|
| 210 | struct stat instat;
|
|---|
| 211 |
|
|---|
| 212 | desc = open (infiles[i], O_RDONLY, 0);
|
|---|
| 213 | if (desc < 0)
|
|---|
| 214 | pfatal_with_name (infiles[i]);
|
|---|
| 215 |
|
|---|
| 216 | if (stat (infiles[i], &instat))
|
|---|
| 217 | pfatal_with_name (infiles[i]);
|
|---|
| 218 | if (S_ISDIR (instat.st_mode))
|
|---|
| 219 | {
|
|---|
| 220 | #ifdef EISDIR
|
|---|
| 221 | errno = EISDIR;
|
|---|
| 222 | #endif
|
|---|
| 223 | pfatal_with_name (infiles[i]);
|
|---|
| 224 | }
|
|---|
| 225 |
|
|---|
| 226 | lseek (desc, (off_t) 0, SEEK_END);
|
|---|
| 227 | ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
|
|---|
| 228 |
|
|---|
| 229 | close (desc);
|
|---|
| 230 |
|
|---|
| 231 | outfile = outfiles[i];
|
|---|
| 232 | if (!outfile)
|
|---|
| 233 | outfile = concat (infiles[i], "s");
|
|---|
| 234 |
|
|---|
| 235 | need_initials = 0;
|
|---|
| 236 | first_initial = '\0';
|
|---|
| 237 |
|
|---|
| 238 | if (ptr < MAX_IN_CORE_SORT)
|
|---|
| 239 | /* Sort a small amount of data. */
|
|---|
| 240 | sort_in_core (infiles[i], (int)ptr, outfile);
|
|---|
| 241 | else
|
|---|
| 242 | sort_offline (infiles[i], ptr, outfile);
|
|---|
| 243 | }
|
|---|
| 244 |
|
|---|
| 245 | flush_tempfiles (tempcount);
|
|---|
| 246 | xexit (0);
|
|---|
| 247 | return 0; /* Avoid bogus warnings. */
|
|---|
| 248 | }
|
|---|
| 249 | |
|---|
| 250 |
|
|---|
| 251 | typedef struct
|
|---|
| 252 | {
|
|---|
| 253 | char *long_name;
|
|---|
| 254 | char *short_name;
|
|---|
| 255 | int *variable_ref;
|
|---|
| 256 | int variable_value;
|
|---|
| 257 | char *arg_name;
|
|---|
| 258 | char *doc_string;
|
|---|
| 259 | } TEXINDEX_OPTION;
|
|---|
| 260 |
|
|---|
| 261 | TEXINDEX_OPTION texindex_options[] = {
|
|---|
| 262 | { "--help", "-h", (int *)NULL, 0, (char *)NULL,
|
|---|
| 263 | N_("display this help and exit") },
|
|---|
| 264 | { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
|
|---|
| 265 | N_("keep temporary files around after processing") },
|
|---|
| 266 | { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
|
|---|
| 267 | N_("do not keep temporary files around after processing (default)") },
|
|---|
| 268 | { "--output", "-o", (int *)NULL, 0, "FILE",
|
|---|
| 269 | N_("send output to FILE") },
|
|---|
| 270 | { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
|
|---|
| 271 | N_("display version information and exit") },
|
|---|
| 272 | { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
|
|---|
| 273 | };
|
|---|
| 274 |
|
|---|
| 275 | void
|
|---|
| 276 | usage (int result_value)
|
|---|
| 277 | {
|
|---|
| 278 | register int i;
|
|---|
| 279 | FILE *f = result_value ? stderr : stdout;
|
|---|
| 280 |
|
|---|
| 281 | fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
|
|---|
| 282 | fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
|
|---|
| 283 | /* Avoid trigraph nonsense. */
|
|---|
| 284 | fprintf (f,
|
|---|
| 285 | _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
|
|---|
| 286 | '?', '?'); /* avoid trigraph in cat-id-tbl.c */
|
|---|
| 287 | fprintf (f, _("\nOptions:\n"));
|
|---|
| 288 |
|
|---|
| 289 | for (i = 0; texindex_options[i].long_name; i++)
|
|---|
| 290 | {
|
|---|
| 291 | putc (' ', f);
|
|---|
| 292 |
|
|---|
| 293 | if (texindex_options[i].short_name)
|
|---|
| 294 | fprintf (f, "%s, ", texindex_options[i].short_name);
|
|---|
| 295 |
|
|---|
| 296 | fprintf (f, "%s %s",
|
|---|
| 297 | texindex_options[i].long_name,
|
|---|
| 298 | texindex_options[i].arg_name
|
|---|
| 299 | ? texindex_options[i].arg_name : "");
|
|---|
| 300 |
|
|---|
| 301 | fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
|
|---|
| 302 | }
|
|---|
| 303 | fputs (_("\n\
|
|---|
| 304 | Email bug reports to bug-texinfo@gnu.org,\n\
|
|---|
| 305 | general questions and discussion to help-texinfo@gnu.org.\n\
|
|---|
| 306 | Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
|
|---|
| 307 | fputs ("\n", f);
|
|---|
| 308 |
|
|---|
| 309 | xexit (result_value);
|
|---|
| 310 | }
|
|---|
| 311 |
|
|---|
| 312 | /* Decode the command line arguments to set the parameter variables
|
|---|
| 313 | and set up the vector of keyfields and the vector of input files. */
|
|---|
| 314 |
|
|---|
| 315 | void
|
|---|
| 316 | decode_command (int argc, char **argv)
|
|---|
| 317 | {
|
|---|
| 318 | int arg_index = 1;
|
|---|
| 319 | char **ip;
|
|---|
| 320 | char **op;
|
|---|
| 321 |
|
|---|
| 322 | /* Store default values into parameter variables. */
|
|---|
| 323 |
|
|---|
| 324 | tempdir = getenv ("TMPDIR");
|
|---|
| 325 | if (tempdir == NULL)
|
|---|
| 326 | tempdir = getenv ("TEMP");
|
|---|
| 327 | if (tempdir == NULL)
|
|---|
| 328 | tempdir = getenv ("TMP");
|
|---|
| 329 | if (tempdir == NULL)
|
|---|
| 330 | tempdir = DEFAULT_TMPDIR;
|
|---|
| 331 | else
|
|---|
| 332 | tempdir = concat (tempdir, "/");
|
|---|
| 333 |
|
|---|
| 334 | keep_tempfiles = 0;
|
|---|
| 335 |
|
|---|
| 336 | /* Allocate ARGC input files, which must be enough. */
|
|---|
| 337 |
|
|---|
| 338 | infiles = (char **) xmalloc (argc * sizeof (char *));
|
|---|
| 339 | outfiles = (char **) xmalloc (argc * sizeof (char *));
|
|---|
| 340 | ip = infiles;
|
|---|
| 341 | op = outfiles;
|
|---|
| 342 |
|
|---|
| 343 | while (arg_index < argc)
|
|---|
| 344 | {
|
|---|
| 345 | char *arg = argv[arg_index++];
|
|---|
| 346 |
|
|---|
| 347 | if (*arg == '-')
|
|---|
| 348 | {
|
|---|
| 349 | if (strcmp (arg, "--version") == 0)
|
|---|
| 350 | {
|
|---|
| 351 | printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
|
|---|
| 352 | puts ("");
|
|---|
| 353 | puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
|
|---|
| 354 | printf (_("There is NO warranty. You may redistribute this software\n\
|
|---|
| 355 | under the terms of the GNU General Public License.\n\
|
|---|
| 356 | For more information about these matters, see the files named COPYING.\n"));
|
|---|
| 357 | xexit (0);
|
|---|
| 358 | }
|
|---|
| 359 | else if ((strcmp (arg, "--keep") == 0) ||
|
|---|
| 360 | (strcmp (arg, "-k") == 0))
|
|---|
| 361 | {
|
|---|
| 362 | keep_tempfiles = 1;
|
|---|
| 363 | }
|
|---|
| 364 | else if ((strcmp (arg, "--help") == 0) ||
|
|---|
| 365 | (strcmp (arg, "-h") == 0))
|
|---|
| 366 | {
|
|---|
| 367 | usage (0);
|
|---|
| 368 | }
|
|---|
| 369 | else if ((strcmp (arg, "--output") == 0) ||
|
|---|
| 370 | (strcmp (arg, "-o") == 0))
|
|---|
| 371 | {
|
|---|
| 372 | if (argv[arg_index] != (char *)NULL)
|
|---|
| 373 | {
|
|---|
| 374 | arg_index++;
|
|---|
| 375 | if (op > outfiles)
|
|---|
| 376 | *(op - 1) = argv[arg_index];
|
|---|
| 377 | }
|
|---|
| 378 | else
|
|---|
| 379 | usage (1);
|
|---|
| 380 | }
|
|---|
| 381 | else
|
|---|
| 382 | usage (1);
|
|---|
| 383 | }
|
|---|
| 384 | else
|
|---|
| 385 | {
|
|---|
| 386 | *ip++ = arg;
|
|---|
| 387 | *op++ = (char *)NULL;
|
|---|
| 388 | }
|
|---|
| 389 | }
|
|---|
| 390 |
|
|---|
| 391 | /* Record number of keyfields and terminate list of filenames. */
|
|---|
| 392 | num_infiles = ip - infiles;
|
|---|
| 393 | *ip = (char *)NULL;
|
|---|
| 394 | if (num_infiles == 0)
|
|---|
| 395 | usage (1);
|
|---|
| 396 | }
|
|---|
| 397 | |
|---|
| 398 |
|
|---|
| 399 | /* Return a name for temporary file COUNT. */
|
|---|
| 400 |
|
|---|
| 401 | static char *
|
|---|
| 402 | maketempname (int count)
|
|---|
| 403 | {
|
|---|
| 404 | static char *tempbase = NULL;
|
|---|
| 405 | char tempsuffix[10];
|
|---|
| 406 |
|
|---|
| 407 | if (!tempbase)
|
|---|
| 408 | {
|
|---|
| 409 | int fd;
|
|---|
| 410 | tempbase = concat (tempdir, "txidxXXXXXX");
|
|---|
| 411 |
|
|---|
| 412 | fd = mkstemp (tempbase);
|
|---|
| 413 | if (fd == -1)
|
|---|
| 414 | pfatal_with_name (tempbase);
|
|---|
| 415 | }
|
|---|
| 416 |
|
|---|
| 417 | sprintf (tempsuffix, ".%d", count);
|
|---|
| 418 | return concat (tempbase, tempsuffix);
|
|---|
| 419 | }
|
|---|
| 420 |
|
|---|
| 421 |
|
|---|
| 422 | /* Delete all temporary files up to TO_COUNT. */
|
|---|
| 423 |
|
|---|
| 424 | void
|
|---|
| 425 | flush_tempfiles (int to_count)
|
|---|
| 426 | {
|
|---|
| 427 | if (keep_tempfiles)
|
|---|
| 428 | return;
|
|---|
| 429 | while (last_deleted_tempcount < to_count)
|
|---|
| 430 | unlink (maketempname (++last_deleted_tempcount));
|
|---|
| 431 | }
|
|---|
| 432 |
|
|---|
| 433 | |
|---|
| 434 |
|
|---|
| 435 | /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
|
|---|
| 436 |
|
|---|
| 437 | int
|
|---|
| 438 | compare_full (const void *p1, const void *p2)
|
|---|
| 439 | {
|
|---|
| 440 | char **line1 = (char **) p1;
|
|---|
| 441 | char **line2 = (char **) p2;
|
|---|
| 442 | int i;
|
|---|
| 443 |
|
|---|
| 444 | /* Compare using the first keyfield;
|
|---|
| 445 | if that does not distinguish the lines, try the second keyfield;
|
|---|
| 446 | and so on. */
|
|---|
| 447 |
|
|---|
| 448 | for (i = 0; i < num_keyfields; i++)
|
|---|
| 449 | {
|
|---|
| 450 | long length1, length2;
|
|---|
| 451 | char *start1 = find_field (&keyfields[i], *line1, &length1);
|
|---|
| 452 | char *start2 = find_field (&keyfields[i], *line2, &length2);
|
|---|
| 453 | int tem = compare_field (&keyfields[i], start1, length1,
|
|---|
| 454 | *line1 - text_base,
|
|---|
| 455 | start2, length2, *line2 - text_base);
|
|---|
| 456 | if (tem)
|
|---|
| 457 | {
|
|---|
| 458 | if (keyfields[i].reverse)
|
|---|
| 459 | return -tem;
|
|---|
| 460 | return tem;
|
|---|
| 461 | }
|
|---|
| 462 | }
|
|---|
| 463 |
|
|---|
| 464 | return 0; /* Lines match exactly. */
|
|---|
| 465 | }
|
|---|
| 466 |
|
|---|
| 467 | /* Compare LINE1 and LINE2, described by structures
|
|---|
| 468 | in which the first keyfield is identified in advance.
|
|---|
| 469 | For positional sorting, assumes that the order of the lines in core
|
|---|
| 470 | reflects their nominal order. */
|
|---|
| 471 | int
|
|---|
| 472 | compare_prepared (const void *p1, const void *p2)
|
|---|
| 473 | {
|
|---|
| 474 | struct lineinfo *line1 = (struct lineinfo *) p1;
|
|---|
| 475 | struct lineinfo *line2 = (struct lineinfo *) p2;
|
|---|
| 476 | int i;
|
|---|
| 477 | int tem;
|
|---|
| 478 | char *text1, *text2;
|
|---|
| 479 |
|
|---|
| 480 | /* Compare using the first keyfield, which has been found for us already. */
|
|---|
| 481 | if (keyfields->positional)
|
|---|
| 482 | {
|
|---|
| 483 | if (line1->text - text_base > line2->text - text_base)
|
|---|
| 484 | tem = 1;
|
|---|
| 485 | else
|
|---|
| 486 | tem = -1;
|
|---|
| 487 | }
|
|---|
| 488 | else if (keyfields->numeric)
|
|---|
| 489 | tem = line1->key.number - line2->key.number;
|
|---|
| 490 | else
|
|---|
| 491 | tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
|
|---|
| 492 | line2->key.text, line2->keylen, 0);
|
|---|
| 493 | if (tem)
|
|---|
| 494 | {
|
|---|
| 495 | if (keyfields->reverse)
|
|---|
| 496 | return -tem;
|
|---|
| 497 | return tem;
|
|---|
| 498 | }
|
|---|
| 499 |
|
|---|
| 500 | text1 = line1->text;
|
|---|
| 501 | text2 = line2->text;
|
|---|
| 502 |
|
|---|
| 503 | /* Compare using the second keyfield;
|
|---|
| 504 | if that does not distinguish the lines, try the third keyfield;
|
|---|
| 505 | and so on. */
|
|---|
| 506 |
|
|---|
| 507 | for (i = 1; i < num_keyfields; i++)
|
|---|
| 508 | {
|
|---|
| 509 | long length1, length2;
|
|---|
| 510 | char *start1 = find_field (&keyfields[i], text1, &length1);
|
|---|
| 511 | char *start2 = find_field (&keyfields[i], text2, &length2);
|
|---|
| 512 | int tem = compare_field (&keyfields[i], start1, length1,
|
|---|
| 513 | text1 - text_base,
|
|---|
| 514 | start2, length2, text2 - text_base);
|
|---|
| 515 | if (tem)
|
|---|
| 516 | {
|
|---|
| 517 | if (keyfields[i].reverse)
|
|---|
| 518 | return -tem;
|
|---|
| 519 | return tem;
|
|---|
| 520 | }
|
|---|
| 521 | }
|
|---|
| 522 |
|
|---|
| 523 | return 0; /* Lines match exactly. */
|
|---|
| 524 | }
|
|---|
| 525 |
|
|---|
| 526 | /* Like compare_full but more general.
|
|---|
| 527 | You can pass any strings, and you can say how many keyfields to use.
|
|---|
| 528 | POS1 and POS2 should indicate the nominal positional ordering of
|
|---|
| 529 | the two lines in the input. */
|
|---|
| 530 |
|
|---|
| 531 | int
|
|---|
| 532 | compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
|
|---|
| 533 | {
|
|---|
| 534 | int i;
|
|---|
| 535 |
|
|---|
| 536 | /* Compare using the first keyfield;
|
|---|
| 537 | if that does not distinguish the lines, try the second keyfield;
|
|---|
| 538 | and so on. */
|
|---|
| 539 |
|
|---|
| 540 | for (i = 0; i < use_keyfields; i++)
|
|---|
| 541 | {
|
|---|
| 542 | long length1, length2;
|
|---|
| 543 | char *start1 = find_field (&keyfields[i], str1, &length1);
|
|---|
| 544 | char *start2 = find_field (&keyfields[i], str2, &length2);
|
|---|
| 545 | int tem = compare_field (&keyfields[i], start1, length1, pos1,
|
|---|
| 546 | start2, length2, pos2);
|
|---|
| 547 | if (tem)
|
|---|
| 548 | {
|
|---|
| 549 | if (keyfields[i].reverse)
|
|---|
| 550 | return -tem;
|
|---|
| 551 | return tem;
|
|---|
| 552 | }
|
|---|
| 553 | }
|
|---|
| 554 |
|
|---|
| 555 | return 0; /* Lines match exactly. */
|
|---|
| 556 | }
|
|---|
| 557 |
|
|---|
| 558 | /* Find the start and length of a field in STR according to KEYFIELD.
|
|---|
| 559 | A pointer to the starting character is returned, and the length
|
|---|
| 560 | is stored into the int that LENGTHPTR points to. */
|
|---|
| 561 |
|
|---|
| 562 | char *
|
|---|
| 563 | find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
|
|---|
| 564 | {
|
|---|
| 565 | char *start;
|
|---|
| 566 | char *end;
|
|---|
| 567 | char *(*fun) ();
|
|---|
| 568 |
|
|---|
| 569 | if (keyfield->braced)
|
|---|
| 570 | fun = find_braced_pos;
|
|---|
| 571 | else
|
|---|
| 572 | fun = find_pos;
|
|---|
| 573 |
|
|---|
| 574 | start = (*fun) (str, keyfield->startwords, keyfield->startchars,
|
|---|
| 575 | keyfield->ignore_blanks);
|
|---|
| 576 | if (keyfield->endwords < 0)
|
|---|
| 577 | {
|
|---|
| 578 | if (keyfield->braced)
|
|---|
| 579 | end = find_braced_end (start);
|
|---|
| 580 | else
|
|---|
| 581 | {
|
|---|
| 582 | end = start;
|
|---|
| 583 | while (*end && *end != '\n')
|
|---|
| 584 | end++;
|
|---|
| 585 | }
|
|---|
| 586 | }
|
|---|
| 587 | else
|
|---|
| 588 | {
|
|---|
| 589 | end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
|
|---|
| 590 | if (end - str < start - str)
|
|---|
| 591 | end = start;
|
|---|
| 592 | }
|
|---|
| 593 | *lengthptr = end - start;
|
|---|
| 594 | return start;
|
|---|
| 595 | }
|
|---|
| 596 |
|
|---|
| 597 | /* Return a pointer to a specified place within STR,
|
|---|
| 598 | skipping (from the beginning) WORDS words and then CHARS chars.
|
|---|
| 599 | If IGNORE_BLANKS is nonzero, we skip all blanks
|
|---|
| 600 | after finding the specified word. */
|
|---|
| 601 |
|
|---|
| 602 | char *
|
|---|
| 603 | find_pos (char *str, int words, int chars, int ignore_blanks)
|
|---|
| 604 | {
|
|---|
| 605 | int i;
|
|---|
| 606 | char *p = str;
|
|---|
| 607 |
|
|---|
| 608 | for (i = 0; i < words; i++)
|
|---|
| 609 | {
|
|---|
| 610 | char c;
|
|---|
| 611 | /* Find next bunch of nonblanks and skip them. */
|
|---|
| 612 | while ((c = *p) == ' ' || c == '\t')
|
|---|
| 613 | p++;
|
|---|
| 614 | while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
|
|---|
| 615 | p++;
|
|---|
| 616 | if (!*p || *p == '\n')
|
|---|
| 617 | return p;
|
|---|
| 618 | }
|
|---|
| 619 |
|
|---|
| 620 | while (*p == ' ' || *p == '\t')
|
|---|
| 621 | p++;
|
|---|
| 622 |
|
|---|
| 623 | for (i = 0; i < chars; i++)
|
|---|
| 624 | {
|
|---|
| 625 | if (!*p || *p == '\n')
|
|---|
| 626 | break;
|
|---|
| 627 | p++;
|
|---|
| 628 | }
|
|---|
| 629 | return p;
|
|---|
| 630 | }
|
|---|
| 631 |
|
|---|
| 632 | /* Like find_pos but assumes that each field is surrounded by braces
|
|---|
| 633 | and that braces within fields are balanced. */
|
|---|
| 634 |
|
|---|
| 635 | char *
|
|---|
| 636 | find_braced_pos (char *str, int words, int chars, int ignore_blanks)
|
|---|
| 637 | {
|
|---|
| 638 | int i;
|
|---|
| 639 | int bracelevel;
|
|---|
| 640 | char *p = str;
|
|---|
| 641 | char c;
|
|---|
| 642 |
|
|---|
| 643 | for (i = 0; i < words; i++)
|
|---|
| 644 | {
|
|---|
| 645 | bracelevel = 1;
|
|---|
| 646 | while ((c = *p++) != '{' && c != '\n' && c)
|
|---|
| 647 | /* Do nothing. */ ;
|
|---|
| 648 | if (c != '{')
|
|---|
| 649 | return p - 1;
|
|---|
| 650 | while (bracelevel)
|
|---|
| 651 | {
|
|---|
| 652 | c = *p++;
|
|---|
| 653 | if (c == '{')
|
|---|
| 654 | bracelevel++;
|
|---|
| 655 | if (c == '}')
|
|---|
| 656 | bracelevel--;
|
|---|
| 657 | if (c == 0 || c == '\n')
|
|---|
| 658 | return p - 1;
|
|---|
| 659 | }
|
|---|
| 660 | }
|
|---|
| 661 |
|
|---|
| 662 | while ((c = *p++) != '{' && c != '\n' && c)
|
|---|
| 663 | /* Do nothing. */ ;
|
|---|
| 664 |
|
|---|
| 665 | if (c != '{')
|
|---|
| 666 | return p - 1;
|
|---|
| 667 |
|
|---|
| 668 | if (ignore_blanks)
|
|---|
| 669 | while ((c = *p) == ' ' || c == '\t')
|
|---|
| 670 | p++;
|
|---|
| 671 |
|
|---|
| 672 | for (i = 0; i < chars; i++)
|
|---|
| 673 | {
|
|---|
| 674 | if (!*p || *p == '\n')
|
|---|
| 675 | break;
|
|---|
| 676 | p++;
|
|---|
| 677 | }
|
|---|
| 678 | return p;
|
|---|
| 679 | }
|
|---|
| 680 |
|
|---|
| 681 | /* Find the end of the balanced-brace field which starts at STR.
|
|---|
| 682 | The position returned is just before the closing brace. */
|
|---|
| 683 |
|
|---|
| 684 | char *
|
|---|
| 685 | find_braced_end (char *str)
|
|---|
| 686 | {
|
|---|
| 687 | int bracelevel;
|
|---|
| 688 | char *p = str;
|
|---|
| 689 | char c;
|
|---|
| 690 |
|
|---|
| 691 | bracelevel = 1;
|
|---|
| 692 | while (bracelevel)
|
|---|
| 693 | {
|
|---|
| 694 | c = *p++;
|
|---|
| 695 | if (c == '{')
|
|---|
| 696 | bracelevel++;
|
|---|
| 697 | if (c == '}')
|
|---|
| 698 | bracelevel--;
|
|---|
| 699 | if (c == 0 || c == '\n')
|
|---|
| 700 | return p - 1;
|
|---|
| 701 | }
|
|---|
| 702 | return p - 1;
|
|---|
| 703 | }
|
|---|
| 704 |
|
|---|
| 705 | long
|
|---|
| 706 | find_value (char *start, long int length)
|
|---|
| 707 | {
|
|---|
| 708 | while (length != 0L)
|
|---|
| 709 | {
|
|---|
| 710 | if (isdigit (*start))
|
|---|
| 711 | return atol (start);
|
|---|
| 712 | length--;
|
|---|
| 713 | start++;
|
|---|
| 714 | }
|
|---|
| 715 | return 0l;
|
|---|
| 716 | }
|
|---|
| 717 |
|
|---|
| 718 | /* Vector used to translate characters for comparison.
|
|---|
| 719 | This is how we make all alphanumerics follow all else,
|
|---|
| 720 | and ignore case in the first sorting. */
|
|---|
| 721 | int char_order[256];
|
|---|
| 722 |
|
|---|
| 723 | void
|
|---|
| 724 | init_char_order (void)
|
|---|
| 725 | {
|
|---|
| 726 | int i;
|
|---|
| 727 | for (i = 1; i < 256; i++)
|
|---|
| 728 | char_order[i] = i;
|
|---|
| 729 |
|
|---|
| 730 | for (i = '0'; i <= '9'; i++)
|
|---|
| 731 | char_order[i] += 512;
|
|---|
| 732 |
|
|---|
| 733 | for (i = 'a'; i <= 'z'; i++)
|
|---|
| 734 | {
|
|---|
| 735 | char_order[i] = 512 + i;
|
|---|
| 736 | char_order[i + 'A' - 'a'] = 512 + i;
|
|---|
| 737 | }
|
|---|
| 738 | }
|
|---|
| 739 |
|
|---|
| 740 | /* Compare two fields (each specified as a start pointer and a character count)
|
|---|
| 741 | according to KEYFIELD.
|
|---|
| 742 | The sign of the value reports the relation between the fields. */
|
|---|
| 743 |
|
|---|
| 744 | int
|
|---|
| 745 | compare_field (struct keyfield *keyfield, char *start1, long int length1,
|
|---|
| 746 | long int pos1, char *start2, long int length2, long int pos2)
|
|---|
| 747 | {
|
|---|
| 748 | if (keyfields->positional)
|
|---|
| 749 | {
|
|---|
| 750 | if (pos1 > pos2)
|
|---|
| 751 | return 1;
|
|---|
| 752 | else
|
|---|
| 753 | return -1;
|
|---|
| 754 | }
|
|---|
| 755 | if (keyfield->numeric)
|
|---|
| 756 | {
|
|---|
| 757 | long value = find_value (start1, length1) - find_value (start2, length2);
|
|---|
| 758 | if (value > 0)
|
|---|
| 759 | return 1;
|
|---|
| 760 | if (value < 0)
|
|---|
| 761 | return -1;
|
|---|
| 762 | return 0;
|
|---|
| 763 | }
|
|---|
| 764 | else
|
|---|
| 765 | {
|
|---|
| 766 | char *p1 = start1;
|
|---|
| 767 | char *p2 = start2;
|
|---|
| 768 | char *e1 = start1 + length1;
|
|---|
| 769 | char *e2 = start2 + length2;
|
|---|
| 770 |
|
|---|
| 771 | while (1)
|
|---|
| 772 | {
|
|---|
| 773 | int c1, c2;
|
|---|
| 774 |
|
|---|
| 775 | if (p1 == e1)
|
|---|
| 776 | c1 = 0;
|
|---|
| 777 | else
|
|---|
| 778 | c1 = *p1++;
|
|---|
| 779 | if (p2 == e2)
|
|---|
| 780 | c2 = 0;
|
|---|
| 781 | else
|
|---|
| 782 | c2 = *p2++;
|
|---|
| 783 |
|
|---|
| 784 | if (char_order[c1] != char_order[c2])
|
|---|
| 785 | return char_order[c1] - char_order[c2];
|
|---|
| 786 | if (!c1)
|
|---|
| 787 | break;
|
|---|
| 788 | }
|
|---|
| 789 |
|
|---|
| 790 | /* Strings are equal except possibly for case. */
|
|---|
| 791 | p1 = start1;
|
|---|
| 792 | p2 = start2;
|
|---|
| 793 | while (1)
|
|---|
| 794 | {
|
|---|
| 795 | int c1, c2;
|
|---|
| 796 |
|
|---|
| 797 | if (p1 == e1)
|
|---|
| 798 | c1 = 0;
|
|---|
| 799 | else
|
|---|
| 800 | c1 = *p1++;
|
|---|
| 801 | if (p2 == e2)
|
|---|
| 802 | c2 = 0;
|
|---|
| 803 | else
|
|---|
| 804 | c2 = *p2++;
|
|---|
| 805 |
|
|---|
| 806 | if (c1 != c2)
|
|---|
| 807 | /* Reverse sign here so upper case comes out last. */
|
|---|
| 808 | return c2 - c1;
|
|---|
| 809 | if (!c1)
|
|---|
| 810 | break;
|
|---|
| 811 | }
|
|---|
| 812 |
|
|---|
| 813 | return 0;
|
|---|
| 814 | }
|
|---|
| 815 | }
|
|---|
| 816 | |
|---|
| 817 |
|
|---|
| 818 | /* A `struct linebuffer' is a structure which holds a line of text.
|
|---|
| 819 | `readline' reads a line from a stream into a linebuffer
|
|---|
| 820 | and works regardless of the length of the line. */
|
|---|
| 821 |
|
|---|
| 822 | struct linebuffer
|
|---|
| 823 | {
|
|---|
| 824 | long size;
|
|---|
| 825 | char *buffer;
|
|---|
| 826 | };
|
|---|
| 827 |
|
|---|
| 828 | /* Initialize LINEBUFFER for use. */
|
|---|
| 829 |
|
|---|
| 830 | void
|
|---|
| 831 | initbuffer (struct linebuffer *linebuffer)
|
|---|
| 832 | {
|
|---|
| 833 | linebuffer->size = 200;
|
|---|
| 834 | linebuffer->buffer = (char *) xmalloc (200);
|
|---|
| 835 | }
|
|---|
| 836 |
|
|---|
| 837 | /* Read a line of text from STREAM into LINEBUFFER.
|
|---|
| 838 | Return the length of the line. */
|
|---|
| 839 |
|
|---|
| 840 | long
|
|---|
| 841 | readline (struct linebuffer *linebuffer, FILE *stream)
|
|---|
| 842 | {
|
|---|
| 843 | char *buffer = linebuffer->buffer;
|
|---|
| 844 | char *p = linebuffer->buffer;
|
|---|
| 845 | char *end = p + linebuffer->size;
|
|---|
| 846 |
|
|---|
| 847 | while (1)
|
|---|
| 848 | {
|
|---|
| 849 | int c = getc (stream);
|
|---|
| 850 | if (p == end)
|
|---|
| 851 | {
|
|---|
| 852 | buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
|
|---|
| 853 | p += buffer - linebuffer->buffer;
|
|---|
| 854 | end += buffer - linebuffer->buffer;
|
|---|
| 855 | linebuffer->buffer = buffer;
|
|---|
| 856 | }
|
|---|
| 857 | if (c < 0 || c == '\n')
|
|---|
| 858 | {
|
|---|
| 859 | *p = 0;
|
|---|
| 860 | break;
|
|---|
| 861 | }
|
|---|
| 862 | *p++ = c;
|
|---|
| 863 | }
|
|---|
| 864 |
|
|---|
| 865 | return p - buffer;
|
|---|
| 866 | }
|
|---|
| 867 | |
|---|
| 868 |
|
|---|
| 869 | /* Sort an input file too big to sort in core. */
|
|---|
| 870 |
|
|---|
| 871 | void
|
|---|
| 872 | sort_offline (char *infile, off_t total, char *outfile)
|
|---|
| 873 | {
|
|---|
| 874 | /* More than enough. */
|
|---|
| 875 | int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
|
|---|
| 876 | char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
|
|---|
| 877 | FILE *istream = fopen (infile, "r");
|
|---|
| 878 | int i;
|
|---|
| 879 | struct linebuffer lb;
|
|---|
| 880 | long linelength;
|
|---|
| 881 | int failure = 0;
|
|---|
| 882 |
|
|---|
| 883 | initbuffer (&lb);
|
|---|
| 884 |
|
|---|
| 885 | /* Read in one line of input data. */
|
|---|
| 886 |
|
|---|
| 887 | linelength = readline (&lb, istream);
|
|---|
| 888 |
|
|---|
| 889 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
|
|---|
| 890 | {
|
|---|
| 891 | error (_("%s: not a texinfo index file"), infile);
|
|---|
| 892 | return;
|
|---|
| 893 | }
|
|---|
| 894 |
|
|---|
| 895 | /* Split up the input into `ntemps' temporary files, or maybe fewer,
|
|---|
| 896 | and put the new files' names into `tempfiles' */
|
|---|
| 897 |
|
|---|
| 898 | for (i = 0; i < ntemps; i++)
|
|---|
| 899 | {
|
|---|
| 900 | char *outname = maketempname (++tempcount);
|
|---|
| 901 | FILE *ostream = fopen (outname, "w");
|
|---|
| 902 | long tempsize = 0;
|
|---|
| 903 |
|
|---|
| 904 | if (!ostream)
|
|---|
| 905 | pfatal_with_name (outname);
|
|---|
| 906 | tempfiles[i] = outname;
|
|---|
| 907 |
|
|---|
| 908 | /* Copy lines into this temp file as long as it does not make file
|
|---|
| 909 | "too big" or until there are no more lines. */
|
|---|
| 910 |
|
|---|
| 911 | while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
|
|---|
| 912 | {
|
|---|
| 913 | tempsize += linelength + 1;
|
|---|
| 914 | fputs (lb.buffer, ostream);
|
|---|
| 915 | putc ('\n', ostream);
|
|---|
| 916 |
|
|---|
| 917 | /* Read another line of input data. */
|
|---|
| 918 |
|
|---|
| 919 | linelength = readline (&lb, istream);
|
|---|
| 920 | if (!linelength && feof (istream))
|
|---|
| 921 | break;
|
|---|
| 922 |
|
|---|
| 923 | if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
|
|---|
| 924 | {
|
|---|
| 925 | error (_("%s: not a texinfo index file"), infile);
|
|---|
| 926 | failure = 1;
|
|---|
| 927 | goto fail;
|
|---|
| 928 | }
|
|---|
| 929 | }
|
|---|
| 930 | fclose (ostream);
|
|---|
| 931 | if (feof (istream))
|
|---|
| 932 | break;
|
|---|
| 933 | }
|
|---|
| 934 |
|
|---|
| 935 | free (lb.buffer);
|
|---|
| 936 |
|
|---|
| 937 | fail:
|
|---|
| 938 | /* Record number of temp files we actually needed. */
|
|---|
| 939 |
|
|---|
| 940 | ntemps = i;
|
|---|
| 941 |
|
|---|
| 942 | /* Sort each tempfile into another tempfile.
|
|---|
| 943 | Delete the first set of tempfiles and put the names of the second
|
|---|
| 944 | into `tempfiles'. */
|
|---|
| 945 |
|
|---|
| 946 | for (i = 0; i < ntemps; i++)
|
|---|
| 947 | {
|
|---|
| 948 | char *newtemp = maketempname (++tempcount);
|
|---|
| 949 | sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
|
|---|
| 950 | if (!keep_tempfiles)
|
|---|
| 951 | unlink (tempfiles[i]);
|
|---|
| 952 | tempfiles[i] = newtemp;
|
|---|
| 953 | }
|
|---|
| 954 |
|
|---|
| 955 | if (failure)
|
|---|
| 956 | return;
|
|---|
| 957 |
|
|---|
| 958 | /* Merge the tempfiles together and indexify. */
|
|---|
| 959 |
|
|---|
| 960 | merge_files (tempfiles, ntemps, outfile);
|
|---|
| 961 | }
|
|---|
| 962 | |
|---|
| 963 |
|
|---|
| 964 | /* Sort INFILE, whose size is TOTAL,
|
|---|
| 965 | assuming that is small enough to be done in-core,
|
|---|
| 966 | then indexify it and send the output to OUTFILE (or to stdout). */
|
|---|
| 967 |
|
|---|
| 968 | void
|
|---|
| 969 | sort_in_core (char *infile, int total, char *outfile)
|
|---|
| 970 | {
|
|---|
| 971 | char **nextline;
|
|---|
| 972 | char *data = (char *) xmalloc (total + 1);
|
|---|
| 973 | char *file_data;
|
|---|
| 974 | long file_size;
|
|---|
| 975 | int i;
|
|---|
| 976 | FILE *ostream = stdout;
|
|---|
| 977 | struct lineinfo *lineinfo;
|
|---|
| 978 |
|
|---|
| 979 | /* Read the contents of the file into the moby array `data'. */
|
|---|
| 980 |
|
|---|
| 981 | int desc = open (infile, O_RDONLY, 0);
|
|---|
| 982 |
|
|---|
| 983 | if (desc < 0)
|
|---|
| 984 | fatal (_("failure reopening %s"), infile);
|
|---|
| 985 | for (file_size = 0;;)
|
|---|
| 986 | {
|
|---|
| 987 | i = read (desc, data + file_size, total - file_size);
|
|---|
| 988 | if (i <= 0)
|
|---|
| 989 | break;
|
|---|
| 990 | file_size += i;
|
|---|
| 991 | }
|
|---|
| 992 | file_data = data;
|
|---|
| 993 | data[file_size] = 0;
|
|---|
| 994 |
|
|---|
| 995 | close (desc);
|
|---|
| 996 |
|
|---|
| 997 | if (file_size > 0 && data[0] != '\\' && data[0] != '@')
|
|---|
| 998 | {
|
|---|
| 999 | error (_("%s: not a texinfo index file"), infile);
|
|---|
| 1000 | return;
|
|---|
| 1001 | }
|
|---|
| 1002 |
|
|---|
| 1003 | init_char_order ();
|
|---|
| 1004 |
|
|---|
| 1005 | /* Sort routines want to know this address. */
|
|---|
| 1006 |
|
|---|
| 1007 | text_base = data;
|
|---|
| 1008 |
|
|---|
| 1009 | /* Create the array of pointers to lines, with a default size
|
|---|
| 1010 | frequently enough. */
|
|---|
| 1011 |
|
|---|
| 1012 | nlines = total / 50;
|
|---|
| 1013 | if (!nlines)
|
|---|
| 1014 | nlines = 2;
|
|---|
| 1015 | linearray = (char **) xmalloc (nlines * sizeof (char *));
|
|---|
| 1016 |
|
|---|
| 1017 | /* `nextline' points to the next free slot in this array.
|
|---|
| 1018 | `nlines' is the allocated size. */
|
|---|
| 1019 |
|
|---|
| 1020 | nextline = linearray;
|
|---|
| 1021 |
|
|---|
| 1022 | /* Parse the input file's data, and make entries for the lines. */
|
|---|
| 1023 |
|
|---|
| 1024 | nextline = parsefile (infile, nextline, file_data, file_size);
|
|---|
| 1025 | if (nextline == 0)
|
|---|
| 1026 | {
|
|---|
| 1027 | error (_("%s: not a texinfo index file"), infile);
|
|---|
| 1028 | return;
|
|---|
| 1029 | }
|
|---|
| 1030 |
|
|---|
| 1031 | /* Sort the lines. */
|
|---|
| 1032 |
|
|---|
| 1033 | /* If we have enough space, find the first keyfield of each line in advance.
|
|---|
| 1034 | Make a `struct lineinfo' for each line, which records the keyfield
|
|---|
| 1035 | as well as the line, and sort them. */
|
|---|
| 1036 |
|
|---|
| 1037 | lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
|
|---|
| 1038 |
|
|---|
| 1039 | if (lineinfo)
|
|---|
| 1040 | {
|
|---|
| 1041 | struct lineinfo *lp;
|
|---|
| 1042 | char **p;
|
|---|
| 1043 |
|
|---|
| 1044 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
|
|---|
| 1045 | {
|
|---|
| 1046 | lp->text = *p;
|
|---|
| 1047 | lp->key.text = find_field (keyfields, *p, &lp->keylen);
|
|---|
| 1048 | if (keyfields->numeric)
|
|---|
| 1049 | lp->key.number = find_value (lp->key.text, lp->keylen);
|
|---|
| 1050 | }
|
|---|
| 1051 |
|
|---|
| 1052 | qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
|
|---|
| 1053 | compare_prepared);
|
|---|
| 1054 |
|
|---|
| 1055 | for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
|
|---|
| 1056 | *p = lp->text;
|
|---|
| 1057 |
|
|---|
| 1058 | free (lineinfo);
|
|---|
| 1059 | }
|
|---|
| 1060 | else
|
|---|
| 1061 | qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
|
|---|
| 1062 |
|
|---|
| 1063 | /* Open the output file. */
|
|---|
| 1064 |
|
|---|
| 1065 | if (outfile)
|
|---|
| 1066 | {
|
|---|
| 1067 | ostream = fopen (outfile, "w");
|
|---|
| 1068 | if (!ostream)
|
|---|
| 1069 | pfatal_with_name (outfile);
|
|---|
| 1070 | }
|
|---|
| 1071 |
|
|---|
| 1072 | writelines (linearray, nextline - linearray, ostream);
|
|---|
| 1073 | if (outfile)
|
|---|
| 1074 | fclose (ostream);
|
|---|
| 1075 |
|
|---|
| 1076 | free (linearray);
|
|---|
| 1077 | free (data);
|
|---|
| 1078 | }
|
|---|
| 1079 | |
|---|
| 1080 |
|
|---|
| 1081 | /* Parse an input string in core into lines.
|
|---|
| 1082 | DATA is the input string, and SIZE is its length.
|
|---|
| 1083 | Data goes in LINEARRAY starting at NEXTLINE.
|
|---|
| 1084 | The value returned is the first entry in LINEARRAY still unused.
|
|---|
| 1085 | Value 0 means input file contents are invalid. */
|
|---|
| 1086 |
|
|---|
| 1087 | char **
|
|---|
| 1088 | parsefile (char *filename, char **nextline, char *data, long int size)
|
|---|
| 1089 | {
|
|---|
| 1090 | char *p, *end;
|
|---|
| 1091 | char **line = nextline;
|
|---|
| 1092 |
|
|---|
| 1093 | p = data;
|
|---|
| 1094 | end = p + size;
|
|---|
| 1095 | *end = 0;
|
|---|
| 1096 |
|
|---|
| 1097 | while (p != end)
|
|---|
| 1098 | {
|
|---|
| 1099 | if (p[0] != '\\' && p[0] != '@')
|
|---|
| 1100 | return 0;
|
|---|
| 1101 |
|
|---|
| 1102 | *line = p;
|
|---|
| 1103 |
|
|---|
| 1104 | /* Find the first letter of the first field of this line. If it
|
|---|
| 1105 | is different from the first letter of the first field of the
|
|---|
| 1106 | first line, we need initial headers in the output index. */
|
|---|
| 1107 | while (*p && *p != '{')
|
|---|
| 1108 | p++;
|
|---|
| 1109 | if (p == end)
|
|---|
| 1110 | return 0;
|
|---|
| 1111 | p++;
|
|---|
| 1112 | if (first_initial)
|
|---|
| 1113 | {
|
|---|
| 1114 | if (first_initial != toupper (*p))
|
|---|
| 1115 | need_initials = 1;
|
|---|
| 1116 | }
|
|---|
| 1117 | else
|
|---|
| 1118 | first_initial = toupper (*p);
|
|---|
| 1119 |
|
|---|
| 1120 | while (*p && *p != '\n')
|
|---|
| 1121 | p++;
|
|---|
| 1122 | if (p != end)
|
|---|
| 1123 | p++;
|
|---|
| 1124 |
|
|---|
| 1125 | line++;
|
|---|
| 1126 | if (line == linearray + nlines)
|
|---|
| 1127 | {
|
|---|
| 1128 | char **old = linearray;
|
|---|
| 1129 | linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
|
|---|
| 1130 | line += linearray - old;
|
|---|
| 1131 | }
|
|---|
| 1132 | }
|
|---|
| 1133 |
|
|---|
| 1134 | return line;
|
|---|
| 1135 | }
|
|---|
| 1136 | |
|---|
| 1137 |
|
|---|
| 1138 | /* Indexification is a filter applied to the sorted lines
|
|---|
| 1139 | as they are being written to the output file.
|
|---|
| 1140 | Multiple entries for the same name, with different page numbers,
|
|---|
| 1141 | get combined into a single entry with multiple page numbers.
|
|---|
| 1142 | The first braced field, which is used for sorting, is discarded.
|
|---|
| 1143 | However, its first character is examined, folded to lower case,
|
|---|
| 1144 | and if it is different from that in the previous line fed to us
|
|---|
| 1145 | a \initial line is written with one argument, the new initial.
|
|---|
| 1146 |
|
|---|
| 1147 | If an entry has four braced fields, then the second and third
|
|---|
| 1148 | constitute primary and secondary names.
|
|---|
| 1149 | In this case, each change of primary name
|
|---|
| 1150 | generates a \primary line which contains only the primary name,
|
|---|
| 1151 | and in between these are \secondary lines which contain
|
|---|
| 1152 | just a secondary name and page numbers. */
|
|---|
| 1153 |
|
|---|
| 1154 | /* The last primary name we wrote a \primary entry for.
|
|---|
| 1155 | If only one level of indexing is being done, this is the last name seen. */
|
|---|
| 1156 | char *lastprimary;
|
|---|
| 1157 | /* Length of storage allocated for lastprimary. */
|
|---|
| 1158 | int lastprimarylength;
|
|---|
| 1159 |
|
|---|
| 1160 | /* Similar, for the secondary name. */
|
|---|
| 1161 | char *lastsecondary;
|
|---|
| 1162 | int lastsecondarylength;
|
|---|
| 1163 |
|
|---|
| 1164 | /* Zero if we are not in the middle of writing an entry.
|
|---|
| 1165 | One if we have written the beginning of an entry but have not
|
|---|
| 1166 | yet written any page numbers into it.
|
|---|
| 1167 | Greater than one if we have written the beginning of an entry
|
|---|
| 1168 | plus at least one page number. */
|
|---|
| 1169 | int pending;
|
|---|
| 1170 |
|
|---|
| 1171 | /* The initial (for sorting purposes) of the last primary entry written.
|
|---|
| 1172 | When this changes, a \initial {c} line is written */
|
|---|
| 1173 |
|
|---|
| 1174 | char *lastinitial;
|
|---|
| 1175 |
|
|---|
| 1176 | int lastinitiallength;
|
|---|
| 1177 |
|
|---|
| 1178 | /* When we need a string of length 1 for the value of lastinitial,
|
|---|
| 1179 | store it here. */
|
|---|
| 1180 |
|
|---|
| 1181 | char lastinitial1[2];
|
|---|
| 1182 |
|
|---|
| 1183 | /* Initialize static storage for writing an index. */
|
|---|
| 1184 |
|
|---|
| 1185 | void
|
|---|
| 1186 | init_index (void)
|
|---|
| 1187 | {
|
|---|
| 1188 | pending = 0;
|
|---|
| 1189 | lastinitial = lastinitial1;
|
|---|
| 1190 | lastinitial1[0] = 0;
|
|---|
| 1191 | lastinitial1[1] = 0;
|
|---|
| 1192 | lastinitiallength = 0;
|
|---|
| 1193 | lastprimarylength = 100;
|
|---|
| 1194 | lastprimary = (char *) xmalloc (lastprimarylength + 1);
|
|---|
| 1195 | memset (lastprimary, '\0', lastprimarylength + 1);
|
|---|
| 1196 | lastsecondarylength = 100;
|
|---|
| 1197 | lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
|
|---|
| 1198 | memset (lastsecondary, '\0', lastsecondarylength + 1);
|
|---|
| 1199 | }
|
|---|
| 1200 |
|
|---|
| 1201 | /* Indexify. Merge entries for the same name,
|
|---|
| 1202 | insert headers for each initial character, etc. */
|
|---|
| 1203 |
|
|---|
| 1204 | void
|
|---|
| 1205 | indexify (char *line, FILE *ostream)
|
|---|
| 1206 | {
|
|---|
| 1207 | char *primary, *secondary, *pagenumber;
|
|---|
| 1208 | int primarylength, secondarylength = 0, pagelength;
|
|---|
| 1209 | int nosecondary;
|
|---|
| 1210 | int initiallength;
|
|---|
| 1211 | char *initial;
|
|---|
| 1212 | char initial1[2];
|
|---|
| 1213 | register char *p;
|
|---|
| 1214 |
|
|---|
| 1215 | /* First, analyze the parts of the entry fed to us this time. */
|
|---|
| 1216 |
|
|---|
| 1217 | p = find_braced_pos (line, 0, 0, 0);
|
|---|
| 1218 | if (*p == '{')
|
|---|
| 1219 | {
|
|---|
| 1220 | initial = p;
|
|---|
| 1221 | /* Get length of inner pair of braces starting at `p',
|
|---|
| 1222 | including that inner pair of braces. */
|
|---|
| 1223 | initiallength = find_braced_end (p + 1) + 1 - p;
|
|---|
| 1224 | }
|
|---|
| 1225 | else
|
|---|
| 1226 | {
|
|---|
| 1227 | initial = initial1;
|
|---|
| 1228 | initial1[0] = toupper (*p);
|
|---|
| 1229 | initial1[1] = 0;
|
|---|
| 1230 | initiallength = 1;
|
|---|
| 1231 | }
|
|---|
| 1232 |
|
|---|
| 1233 | pagenumber = find_braced_pos (line, 1, 0, 0);
|
|---|
| 1234 | pagelength = find_braced_end (pagenumber) - pagenumber;
|
|---|
| 1235 | if (pagelength == 0)
|
|---|
| 1236 | fatal (_("No page number in %s"), line);
|
|---|
| 1237 |
|
|---|
| 1238 | primary = find_braced_pos (line, 2, 0, 0);
|
|---|
| 1239 | primarylength = find_braced_end (primary) - primary;
|
|---|
| 1240 |
|
|---|
| 1241 | secondary = find_braced_pos (line, 3, 0, 0);
|
|---|
| 1242 | nosecondary = !*secondary;
|
|---|
| 1243 | if (!nosecondary)
|
|---|
| 1244 | secondarylength = find_braced_end (secondary) - secondary;
|
|---|
| 1245 |
|
|---|
| 1246 | /* If the primary is different from before, make a new primary entry. */
|
|---|
| 1247 | if (strncmp (primary, lastprimary, primarylength))
|
|---|
| 1248 | {
|
|---|
| 1249 | /* Close off current secondary entry first, if one is open. */
|
|---|
| 1250 | if (pending)
|
|---|
| 1251 | {
|
|---|
| 1252 | fputs ("}\n", ostream);
|
|---|
| 1253 | pending = 0;
|
|---|
| 1254 | }
|
|---|
| 1255 |
|
|---|
| 1256 | /* If this primary has a different initial, include an entry for
|
|---|
| 1257 | the initial. */
|
|---|
| 1258 | if (need_initials &&
|
|---|
| 1259 | (initiallength != lastinitiallength ||
|
|---|
| 1260 | strncmp (initial, lastinitial, initiallength)))
|
|---|
| 1261 | {
|
|---|
| 1262 | fprintf (ostream, "\\initial {");
|
|---|
| 1263 | fwrite (initial, 1, initiallength, ostream);
|
|---|
| 1264 | fputs ("}\n", ostream);
|
|---|
| 1265 | if (initial == initial1)
|
|---|
| 1266 | {
|
|---|
| 1267 | lastinitial = lastinitial1;
|
|---|
| 1268 | *lastinitial1 = *initial1;
|
|---|
| 1269 | }
|
|---|
| 1270 | else
|
|---|
| 1271 | {
|
|---|
| 1272 | lastinitial = initial;
|
|---|
| 1273 | }
|
|---|
| 1274 | lastinitiallength = initiallength;
|
|---|
| 1275 | }
|
|---|
| 1276 |
|
|---|
| 1277 | /* Make the entry for the primary. */
|
|---|
| 1278 | if (nosecondary)
|
|---|
| 1279 | fputs ("\\entry {", ostream);
|
|---|
| 1280 | else
|
|---|
| 1281 | fputs ("\\primary {", ostream);
|
|---|
| 1282 | fwrite (primary, primarylength, 1, ostream);
|
|---|
| 1283 | if (nosecondary)
|
|---|
| 1284 | {
|
|---|
| 1285 | fputs ("}{", ostream);
|
|---|
| 1286 | pending = 1;
|
|---|
| 1287 | }
|
|---|
| 1288 | else
|
|---|
| 1289 | fputs ("}\n", ostream);
|
|---|
| 1290 |
|
|---|
| 1291 | /* Record name of most recent primary. */
|
|---|
| 1292 | if (lastprimarylength < primarylength)
|
|---|
| 1293 | {
|
|---|
| 1294 | lastprimarylength = primarylength + 100;
|
|---|
| 1295 | lastprimary = (char *) xrealloc (lastprimary,
|
|---|
| 1296 | 1 + lastprimarylength);
|
|---|
| 1297 | }
|
|---|
| 1298 | strncpy (lastprimary, primary, primarylength);
|
|---|
| 1299 | lastprimary[primarylength] = 0;
|
|---|
| 1300 |
|
|---|
| 1301 | /* There is no current secondary within this primary, now. */
|
|---|
| 1302 | lastsecondary[0] = 0;
|
|---|
| 1303 | }
|
|---|
| 1304 |
|
|---|
| 1305 | /* Should not have an entry with no subtopic following one with a
|
|---|
| 1306 | subtopic. */
|
|---|
| 1307 |
|
|---|
| 1308 | if (nosecondary && *lastsecondary)
|
|---|
| 1309 | error (_("entry %s follows an entry with a secondary name"), line);
|
|---|
| 1310 |
|
|---|
| 1311 | /* Start a new secondary entry if necessary. */
|
|---|
| 1312 | if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
|
|---|
| 1313 | {
|
|---|
| 1314 | if (pending)
|
|---|
| 1315 | {
|
|---|
| 1316 | fputs ("}\n", ostream);
|
|---|
| 1317 | pending = 0;
|
|---|
| 1318 | }
|
|---|
| 1319 |
|
|---|
| 1320 | /* Write the entry for the secondary. */
|
|---|
| 1321 | fputs ("\\secondary {", ostream);
|
|---|
| 1322 | fwrite (secondary, secondarylength, 1, ostream);
|
|---|
| 1323 | fputs ("}{", ostream);
|
|---|
| 1324 | pending = 1;
|
|---|
| 1325 |
|
|---|
| 1326 | /* Record name of most recent secondary. */
|
|---|
| 1327 | if (lastsecondarylength < secondarylength)
|
|---|
| 1328 | {
|
|---|
| 1329 | lastsecondarylength = secondarylength + 100;
|
|---|
| 1330 | lastsecondary = (char *) xrealloc (lastsecondary,
|
|---|
| 1331 | 1 + lastsecondarylength);
|
|---|
| 1332 | }
|
|---|
| 1333 | strncpy (lastsecondary, secondary, secondarylength);
|
|---|
| 1334 | lastsecondary[secondarylength] = 0;
|
|---|
| 1335 | }
|
|---|
| 1336 |
|
|---|
| 1337 | /* Here to add one more page number to the current entry. */
|
|---|
| 1338 | if (pending++ != 1)
|
|---|
| 1339 | fputs (", ", ostream); /* Punctuate first, if this is not the first. */
|
|---|
| 1340 | fwrite (pagenumber, pagelength, 1, ostream);
|
|---|
| 1341 | }
|
|---|
| 1342 |
|
|---|
| 1343 | /* Close out any unfinished output entry. */
|
|---|
| 1344 |
|
|---|
| 1345 | void
|
|---|
| 1346 | finish_index (FILE *ostream)
|
|---|
| 1347 | {
|
|---|
| 1348 | if (pending)
|
|---|
| 1349 | fputs ("}\n", ostream);
|
|---|
| 1350 | free (lastprimary);
|
|---|
| 1351 | free (lastsecondary);
|
|---|
| 1352 | }
|
|---|
| 1353 | |
|---|
| 1354 |
|
|---|
| 1355 | /* Copy the lines in the sorted order.
|
|---|
| 1356 | Each line is copied out of the input file it was found in. */
|
|---|
| 1357 |
|
|---|
| 1358 | void
|
|---|
| 1359 | writelines (char **linearray, int nlines, FILE *ostream)
|
|---|
| 1360 | {
|
|---|
| 1361 | char **stop_line = linearray + nlines;
|
|---|
| 1362 | char **next_line;
|
|---|
| 1363 |
|
|---|
| 1364 | init_index ();
|
|---|
| 1365 |
|
|---|
| 1366 | /* Output the text of the lines, and free the buffer space. */
|
|---|
| 1367 |
|
|---|
| 1368 | for (next_line = linearray; next_line != stop_line; next_line++)
|
|---|
| 1369 | {
|
|---|
| 1370 | /* If -u was specified, output the line only if distinct from
|
|---|
| 1371 | previous one. */
|
|---|
| 1372 | if (next_line == linearray
|
|---|
| 1373 | /* Compare previous line with this one, using only the
|
|---|
| 1374 | explicitly specd keyfields. */
|
|---|
| 1375 | || compare_general (*(next_line - 1), *next_line, 0L, 0L,
|
|---|
| 1376 | num_keyfields - 1))
|
|---|
| 1377 | {
|
|---|
| 1378 | char *p = *next_line;
|
|---|
| 1379 | char c;
|
|---|
| 1380 |
|
|---|
| 1381 | while ((c = *p++) && c != '\n')
|
|---|
| 1382 | /* Do nothing. */ ;
|
|---|
| 1383 | *(p - 1) = 0;
|
|---|
| 1384 | indexify (*next_line, ostream);
|
|---|
| 1385 | }
|
|---|
| 1386 | }
|
|---|
| 1387 |
|
|---|
| 1388 | finish_index (ostream);
|
|---|
| 1389 | }
|
|---|
| 1390 | |
|---|
| 1391 |
|
|---|
| 1392 | /* Assume (and optionally verify) that each input file is sorted;
|
|---|
| 1393 | merge them and output the result.
|
|---|
| 1394 | Returns nonzero if any input file fails to be sorted.
|
|---|
| 1395 |
|
|---|
| 1396 | This is the high-level interface that can handle an unlimited
|
|---|
| 1397 | number of files. */
|
|---|
| 1398 |
|
|---|
| 1399 | #define MAX_DIRECT_MERGE 10
|
|---|
| 1400 |
|
|---|
| 1401 | int
|
|---|
| 1402 | merge_files (char **infiles, int nfiles, char *outfile)
|
|---|
| 1403 | {
|
|---|
| 1404 | char **tempfiles;
|
|---|
| 1405 | int ntemps;
|
|---|
| 1406 | int i;
|
|---|
| 1407 | int value = 0;
|
|---|
| 1408 | int start_tempcount = tempcount;
|
|---|
| 1409 |
|
|---|
| 1410 | if (nfiles <= MAX_DIRECT_MERGE)
|
|---|
| 1411 | return merge_direct (infiles, nfiles, outfile);
|
|---|
| 1412 |
|
|---|
| 1413 | /* Merge groups of MAX_DIRECT_MERGE input files at a time,
|
|---|
| 1414 | making a temporary file to hold each group's result. */
|
|---|
| 1415 |
|
|---|
| 1416 | ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
|
|---|
| 1417 | tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
|
|---|
| 1418 | for (i = 0; i < ntemps; i++)
|
|---|
| 1419 | {
|
|---|
| 1420 | int nf = MAX_DIRECT_MERGE;
|
|---|
| 1421 | if (i + 1 == ntemps)
|
|---|
| 1422 | nf = nfiles - i * MAX_DIRECT_MERGE;
|
|---|
| 1423 | tempfiles[i] = maketempname (++tempcount);
|
|---|
| 1424 | value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
|
|---|
| 1425 | }
|
|---|
| 1426 |
|
|---|
| 1427 | /* All temporary files that existed before are no longer needed
|
|---|
| 1428 | since their contents have been merged into our new tempfiles.
|
|---|
| 1429 | So delete them. */
|
|---|
| 1430 | flush_tempfiles (start_tempcount);
|
|---|
| 1431 |
|
|---|
| 1432 | /* Now merge the temporary files we created. */
|
|---|
| 1433 |
|
|---|
| 1434 | merge_files (tempfiles, ntemps, outfile);
|
|---|
| 1435 |
|
|---|
| 1436 | free (tempfiles);
|
|---|
| 1437 |
|
|---|
| 1438 | return value;
|
|---|
| 1439 | }
|
|---|
| 1440 | |
|---|
| 1441 |
|
|---|
| 1442 | /* Assume (and optionally verify) that each input file is sorted;
|
|---|
| 1443 | merge them and output the result.
|
|---|
| 1444 | Returns nonzero if any input file fails to be sorted.
|
|---|
| 1445 |
|
|---|
| 1446 | This version of merging will not work if the number of
|
|---|
| 1447 | input files gets too high. Higher level functions
|
|---|
| 1448 | use it only with a bounded number of input files. */
|
|---|
| 1449 |
|
|---|
| 1450 | int
|
|---|
| 1451 | merge_direct (char **infiles, int nfiles, char *outfile)
|
|---|
| 1452 | {
|
|---|
| 1453 | struct linebuffer *lb1, *lb2;
|
|---|
| 1454 | struct linebuffer **thisline, **prevline;
|
|---|
| 1455 | FILE **streams;
|
|---|
| 1456 | int i;
|
|---|
| 1457 | int nleft;
|
|---|
| 1458 | int lossage = 0;
|
|---|
| 1459 | int *file_lossage;
|
|---|
| 1460 | struct linebuffer *prev_out = 0;
|
|---|
| 1461 | FILE *ostream = stdout;
|
|---|
| 1462 |
|
|---|
| 1463 | if (outfile)
|
|---|
| 1464 | {
|
|---|
| 1465 | ostream = fopen (outfile, "w");
|
|---|
| 1466 | }
|
|---|
| 1467 | if (!ostream)
|
|---|
| 1468 | pfatal_with_name (outfile);
|
|---|
| 1469 |
|
|---|
| 1470 | init_index ();
|
|---|
| 1471 |
|
|---|
| 1472 | if (nfiles == 0)
|
|---|
| 1473 | {
|
|---|
| 1474 | if (outfile)
|
|---|
| 1475 | fclose (ostream);
|
|---|
| 1476 | return 0;
|
|---|
| 1477 | }
|
|---|
| 1478 |
|
|---|
| 1479 | /* For each file, make two line buffers. Also, for each file, there
|
|---|
| 1480 | is an element of `thisline' which points at any time to one of the
|
|---|
| 1481 | file's two buffers, and an element of `prevline' which points to
|
|---|
| 1482 | the other buffer. `thisline' is supposed to point to the next
|
|---|
| 1483 | available line from the file, while `prevline' holds the last file
|
|---|
| 1484 | line used, which is remembered so that we can verify that the file
|
|---|
| 1485 | is properly sorted. */
|
|---|
| 1486 |
|
|---|
| 1487 | /* lb1 and lb2 contain one buffer each per file. */
|
|---|
| 1488 | lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
|
|---|
| 1489 | lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
|
|---|
| 1490 |
|
|---|
| 1491 | /* thisline[i] points to the linebuffer holding the next available
|
|---|
| 1492 | line in file i, or is zero if there are no lines left in that file. */
|
|---|
| 1493 | thisline = (struct linebuffer **)
|
|---|
| 1494 | xmalloc (nfiles * sizeof (struct linebuffer *));
|
|---|
| 1495 | /* prevline[i] points to the linebuffer holding the last used line
|
|---|
| 1496 | from file i. This is just for verifying that file i is properly
|
|---|
| 1497 | sorted. */
|
|---|
| 1498 | prevline = (struct linebuffer **)
|
|---|
| 1499 | xmalloc (nfiles * sizeof (struct linebuffer *));
|
|---|
| 1500 | /* streams[i] holds the input stream for file i. */
|
|---|
| 1501 | streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
|
|---|
| 1502 | /* file_lossage[i] is nonzero if we already know file i is not
|
|---|
| 1503 | properly sorted. */
|
|---|
| 1504 | file_lossage = (int *) xmalloc (nfiles * sizeof (int));
|
|---|
| 1505 |
|
|---|
| 1506 | /* Allocate and initialize all that storage. */
|
|---|
| 1507 |
|
|---|
| 1508 | for (i = 0; i < nfiles; i++)
|
|---|
| 1509 | {
|
|---|
| 1510 | initbuffer (&lb1[i]);
|
|---|
| 1511 | initbuffer (&lb2[i]);
|
|---|
| 1512 | thisline[i] = &lb1[i];
|
|---|
| 1513 | prevline[i] = &lb2[i];
|
|---|
| 1514 | file_lossage[i] = 0;
|
|---|
| 1515 | streams[i] = fopen (infiles[i], "r");
|
|---|
| 1516 | if (!streams[i])
|
|---|
| 1517 | pfatal_with_name (infiles[i]);
|
|---|
| 1518 |
|
|---|
| 1519 | readline (thisline[i], streams[i]);
|
|---|
| 1520 | }
|
|---|
| 1521 |
|
|---|
| 1522 | /* Keep count of number of files not at eof. */
|
|---|
| 1523 | nleft = nfiles;
|
|---|
| 1524 |
|
|---|
| 1525 | while (nleft)
|
|---|
| 1526 | {
|
|---|
| 1527 | struct linebuffer *best = 0;
|
|---|
| 1528 | struct linebuffer *exch;
|
|---|
| 1529 | int bestfile = -1;
|
|---|
| 1530 | int i;
|
|---|
| 1531 |
|
|---|
| 1532 | /* Look at the next avail line of each file; choose the least one. */
|
|---|
| 1533 |
|
|---|
| 1534 | for (i = 0; i < nfiles; i++)
|
|---|
| 1535 | {
|
|---|
| 1536 | if (thisline[i] &&
|
|---|
| 1537 | (!best ||
|
|---|
| 1538 | 0 < compare_general (best->buffer, thisline[i]->buffer,
|
|---|
| 1539 | (long) bestfile, (long) i, num_keyfields)))
|
|---|
| 1540 | {
|
|---|
| 1541 | best = thisline[i];
|
|---|
| 1542 | bestfile = i;
|
|---|
| 1543 | }
|
|---|
| 1544 | }
|
|---|
| 1545 |
|
|---|
| 1546 | /* Output that line, unless it matches the previous one and we
|
|---|
| 1547 | don't want duplicates. */
|
|---|
| 1548 |
|
|---|
| 1549 | if (!(prev_out &&
|
|---|
| 1550 | !compare_general (prev_out->buffer,
|
|---|
| 1551 | best->buffer, 0L, 1L, num_keyfields - 1)))
|
|---|
| 1552 | indexify (best->buffer, ostream);
|
|---|
| 1553 | prev_out = best;
|
|---|
| 1554 |
|
|---|
| 1555 | /* Now make the line the previous of its file, and fetch a new
|
|---|
| 1556 | line from that file. */
|
|---|
| 1557 |
|
|---|
| 1558 | exch = prevline[bestfile];
|
|---|
| 1559 | prevline[bestfile] = thisline[bestfile];
|
|---|
| 1560 | thisline[bestfile] = exch;
|
|---|
| 1561 |
|
|---|
| 1562 | while (1)
|
|---|
| 1563 | {
|
|---|
| 1564 | /* If the file has no more, mark it empty. */
|
|---|
| 1565 |
|
|---|
| 1566 | if (feof (streams[bestfile]))
|
|---|
| 1567 | {
|
|---|
| 1568 | thisline[bestfile] = 0;
|
|---|
| 1569 | /* Update the number of files still not empty. */
|
|---|
| 1570 | nleft--;
|
|---|
| 1571 | break;
|
|---|
| 1572 | }
|
|---|
| 1573 | readline (thisline[bestfile], streams[bestfile]);
|
|---|
| 1574 | if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
|
|---|
| 1575 | break;
|
|---|
| 1576 | }
|
|---|
| 1577 | }
|
|---|
| 1578 |
|
|---|
| 1579 | finish_index (ostream);
|
|---|
| 1580 |
|
|---|
| 1581 | /* Free all storage and close all input streams. */
|
|---|
| 1582 |
|
|---|
| 1583 | for (i = 0; i < nfiles; i++)
|
|---|
| 1584 | {
|
|---|
| 1585 | fclose (streams[i]);
|
|---|
| 1586 | free (lb1[i].buffer);
|
|---|
| 1587 | free (lb2[i].buffer);
|
|---|
| 1588 | }
|
|---|
| 1589 | free (file_lossage);
|
|---|
| 1590 | free (lb1);
|
|---|
| 1591 | free (lb2);
|
|---|
| 1592 | free (thisline);
|
|---|
| 1593 | free (prevline);
|
|---|
| 1594 | free (streams);
|
|---|
| 1595 |
|
|---|
| 1596 | if (outfile)
|
|---|
| 1597 | fclose (ostream);
|
|---|
| 1598 |
|
|---|
| 1599 | return lossage;
|
|---|
| 1600 | }
|
|---|
| 1601 | |
|---|
| 1602 |
|
|---|
| 1603 | /* Print error message and exit. */
|
|---|
| 1604 |
|
|---|
| 1605 | void
|
|---|
| 1606 | fatal (const char *format, const char *arg)
|
|---|
| 1607 | {
|
|---|
| 1608 | error (format, arg);
|
|---|
| 1609 | xexit (1);
|
|---|
| 1610 | }
|
|---|
| 1611 |
|
|---|
| 1612 | /* Print error message. FORMAT is printf control string, ARG is arg for it. */
|
|---|
| 1613 | void
|
|---|
| 1614 | error (const char *format, const char *arg)
|
|---|
| 1615 | {
|
|---|
| 1616 | printf ("%s: ", program_name);
|
|---|
| 1617 | printf (format, arg);
|
|---|
| 1618 | if (format[strlen (format) -1] != '\n')
|
|---|
| 1619 | printf ("\n");
|
|---|
| 1620 | }
|
|---|
| 1621 |
|
|---|
| 1622 | void
|
|---|
| 1623 | perror_with_name (const char *name)
|
|---|
| 1624 | {
|
|---|
| 1625 | fprintf (stderr, "%s: ", program_name);
|
|---|
| 1626 | perror (name);
|
|---|
| 1627 | }
|
|---|
| 1628 |
|
|---|
| 1629 | void
|
|---|
| 1630 | pfatal_with_name (const char *name)
|
|---|
| 1631 | {
|
|---|
| 1632 | perror_with_name (name);
|
|---|
| 1633 | xexit (1);
|
|---|
| 1634 | }
|
|---|
| 1635 |
|
|---|
| 1636 | |
|---|
| 1637 |
|
|---|
| 1638 | /* Return a newly-allocated string concatenating S1 and S2. */
|
|---|
| 1639 |
|
|---|
| 1640 | char *
|
|---|
| 1641 | concat (char *s1, char *s2)
|
|---|
| 1642 | {
|
|---|
| 1643 | int len1 = strlen (s1), len2 = strlen (s2);
|
|---|
| 1644 | char *result = (char *) xmalloc (len1 + len2 + 1);
|
|---|
| 1645 |
|
|---|
| 1646 | strcpy (result, s1);
|
|---|
| 1647 | strcpy (result + len1, s2);
|
|---|
| 1648 | *(result + len1 + len2) = 0;
|
|---|
| 1649 |
|
|---|
| 1650 | return result;
|
|---|
| 1651 | }
|
|---|