source: trunk/texinfo/info/search.c@ 2799

Last change on this file since 2799 was 2617, checked in by bird, 20 years ago

GNU Texinfo 4.8

File size: 14.2 KB
Line 
1/* search.c -- searching large bodies of text.
2 $Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp $
3
4 Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
20 Written by Brian Fox (bfox@ai.mit.edu). */
21
22#include "info.h"
23
24#include "search.h"
25#include "nodes.h"
26
27/* The search functions take two arguments:
28
29 1) a string to search for, and
30
31 2) a pointer to a SEARCH_BINDING which contains the buffer, start,
32 and end of the search.
33
34 They return a long, which is the offset from the start of the buffer
35 at which the match was found. An offset of -1 indicates failure. */
36
37/* A function which makes a binding with buffer and bounds. */
38SEARCH_BINDING *
39make_binding (char *buffer, long int start, long int end)
40{
41 SEARCH_BINDING *binding;
42
43 binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
44 binding->buffer = buffer;
45 binding->start = start;
46 binding->end = end;
47 binding->flags = 0;
48
49 return (binding);
50}
51
52/* Make a copy of BINDING without duplicating the data. */
53SEARCH_BINDING *
54copy_binding (SEARCH_BINDING *binding)
55{
56 SEARCH_BINDING *copy;
57
58 copy = make_binding (binding->buffer, binding->start, binding->end);
59 copy->flags = binding->flags;
60 return (copy);
61}
62
63
64
65/* **************************************************************** */
66/* */
67/* The Actual Searching Functions */
68/* */
69/* **************************************************************** */
70
71/* Search forwards or backwards for the text delimited by BINDING.
72 The search is forwards if BINDING->start is greater than BINDING->end. */
73long
74search (char *string, SEARCH_BINDING *binding)
75{
76 long result;
77
78 /* If the search is backwards, then search backwards, otherwise forwards. */
79 if (binding->start > binding->end)
80 result = search_backward (string, binding);
81 else
82 result = search_forward (string, binding);
83
84 return (result);
85}
86
87/* Search forwards for STRING through the text delimited in BINDING. */
88long
89search_forward (char *string, SEARCH_BINDING *binding)
90{
91 register int c, i, len;
92 register char *buff, *end;
93 char *alternate = (char *)NULL;
94
95 len = strlen (string);
96
97 /* We match characters in the search buffer against STRING and ALTERNATE.
98 ALTERNATE is a case reversed version of STRING; this is cheaper than
99 case folding each character before comparison. Alternate is only
100 used if the case folding bit is turned on in the passed BINDING. */
101
102 if (binding->flags & S_FoldCase)
103 {
104 alternate = xstrdup (string);
105
106 for (i = 0; i < len; i++)
107 {
108 if (islower (alternate[i]))
109 alternate[i] = toupper (alternate[i]);
110 else if (isupper (alternate[i]))
111 alternate[i] = tolower (alternate[i]);
112 }
113 }
114
115 buff = binding->buffer + binding->start;
116 end = binding->buffer + binding->end + 1;
117
118 while (buff < (end - len))
119 {
120 for (i = 0; i < len; i++)
121 {
122 c = buff[i];
123
124 if ((c != string[i]) && (!alternate || c != alternate[i]))
125 break;
126 }
127
128 if (!string[i])
129 {
130 if (alternate)
131 free (alternate);
132 if (binding->flags & S_SkipDest)
133 buff += len;
134 return ((long) (buff - binding->buffer));
135 }
136
137 buff++;
138 }
139
140 if (alternate)
141 free (alternate);
142
143 return ((long) -1);
144}
145
146/* Search for STRING backwards through the text delimited in BINDING. */
147long
148search_backward (char *input_string, SEARCH_BINDING *binding)
149{
150 register int c, i, len;
151 register char *buff, *end;
152 char *string;
153 char *alternate = (char *)NULL;
154
155 len = strlen (input_string);
156
157 /* Reverse the characters in the search string. */
158 string = (char *)xmalloc (1 + len);
159 for (c = 0, i = len - 1; input_string[c]; c++, i--)
160 string[i] = input_string[c];
161
162 string[c] = '\0';
163
164 /* We match characters in the search buffer against STRING and ALTERNATE.
165 ALTERNATE is a case reversed version of STRING; this is cheaper than
166 case folding each character before comparison. ALTERNATE is only
167 used if the case folding bit is turned on in the passed BINDING. */
168
169 if (binding->flags & S_FoldCase)
170 {
171 alternate = xstrdup (string);
172
173 for (i = 0; i < len; i++)
174 {
175 if (islower (alternate[i]))
176 alternate[i] = toupper (alternate[i]);
177 else if (isupper (alternate[i]))
178 alternate[i] = tolower (alternate[i]);
179 }
180 }
181
182 buff = binding->buffer + binding->start - 1;
183 end = binding->buffer + binding->end;
184
185 while (buff > (end + len))
186 {
187 for (i = 0; i < len; i++)
188 {
189 c = *(buff - i);
190
191 if (c != string[i] && (!alternate || c != alternate[i]))
192 break;
193 }
194
195 if (!string[i])
196 {
197 free (string);
198 if (alternate)
199 free (alternate);
200
201 if (binding->flags & S_SkipDest)
202 buff -= len;
203 return ((long) (1 + (buff - binding->buffer)));
204 }
205
206 buff--;
207 }
208
209 free (string);
210 if (alternate)
211 free (alternate);
212
213 return ((long) -1);
214}
215
216/* Find STRING in LINE, returning the offset of the end of the string.
217 Return an offset of -1 if STRING does not appear in LINE. The search
218 is bound by the end of the line (i.e., either NEWLINE or 0). */
219int
220string_in_line (char *string, char *line)
221{
222 register int end;
223 SEARCH_BINDING binding;
224
225 /* Find the end of the line. */
226 for (end = 0; line[end] && line[end] != '\n'; end++);
227
228 /* Search for STRING within these confines. */
229 binding.buffer = line;
230 binding.start = 0;
231 binding.end = end;
232 binding.flags = S_FoldCase | S_SkipDest;
233
234 return (search_forward (string, &binding));
235}
236
237/* Return non-zero if STRING is the first text to appear at BINDING. */
238int
239looking_at (char *string, SEARCH_BINDING *binding)
240{
241 long search_end;
242
243 search_end = search (string, binding);
244
245 /* If the string was not found, SEARCH_END is -1. If the string was found,
246 but not right away, SEARCH_END is != binding->start. Otherwise, the
247 string was found at binding->start. */
248 return (search_end == binding->start);
249}
250
251
252/* **************************************************************** */
253/* */
254/* Small String Searches */
255/* */
256/* **************************************************************** */
257
258/* Function names that start with "skip" are passed a string, and return
259 an offset from the start of that string. Function names that start
260 with "find" are passed a SEARCH_BINDING, and return an absolute position
261 marker of the item being searched for. "Find" functions return a value
262 of -1 if the item being looked for couldn't be found. */
263
264/* Return the index of the first non-whitespace character in STRING. */
265int
266skip_whitespace (char *string)
267{
268 register int i;
269
270 for (i = 0; string && whitespace (string[i]); i++);
271 return (i);
272}
273
274/* Return the index of the first non-whitespace or newline character in
275 STRING. */
276int
277skip_whitespace_and_newlines (char *string)
278{
279 register int i;
280
281 for (i = 0; string && whitespace_or_newline (string[i]); i++);
282 return (i);
283}
284
285/* Return the index of the first whitespace character in STRING. */
286int
287skip_non_whitespace (char *string)
288{
289 register int i;
290
291 for (i = 0; string && string[i] && !whitespace (string[i]); i++);
292 return (i);
293}
294
295/* Return the index of the first non-node character in STRING. Note that
296 this function contains quite a bit of hair to ignore periods in some
297 special cases. This is because we here at GNU ship some info files which
298 contain nodenames that contain periods. No such nodename can start with
299 a period, or continue with whitespace, newline, or ')' immediately following
300 the period. If second argument NEWLINES_OKAY is non-zero, newlines should
301 be skipped while parsing out the nodename specification. */
302int
303skip_node_characters (char *string, int newlines_okay)
304{
305 register int c, i = 0;
306 int paren_seen = 0;
307 int paren = 0;
308
309 /* Handle special case. This is when another function has parsed out the
310 filename component of the node name, and we just want to parse out the
311 nodename proper. In that case, a period at the start of the nodename
312 indicates an empty nodename. */
313 if (string && *string == '.')
314 return (0);
315
316 if (string && *string == '(')
317 {
318 paren++;
319 paren_seen++;
320 i++;
321 }
322
323 for (; string && (c = string[i]); i++)
324 {
325 if (paren)
326 {
327 if (c == '(')
328 paren++;
329 else if (c == ')')
330 paren--;
331
332 continue;
333 }
334
335 /* If the character following the close paren is a space or period,
336 then this node name has no more characters associated with it. */
337 if (c == '\t' ||
338 c == ',' ||
339 c == INFO_TAGSEP ||
340 ((!newlines_okay) && (c == '\n')) ||
341 ((paren_seen && string[i - 1] == ')') &&
342 (c == ' ' || c == '.')) ||
343 (c == '.' &&
344 (
345#if 0
346/* This test causes a node name ending in a period, like `This.', not to
347 be found. The trailing . is stripped. This occurs in the jargon
348 file (`I see no X here.' is a node name). */
349 (!string[i + 1]) ||
350#endif
351 (whitespace_or_newline (string[i + 1])) ||
352 (string[i + 1] == ')'))))
353 break;
354 }
355 return (i);
356}
357
358
359
360/* **************************************************************** */
361/* */
362/* Searching FILE_BUFFER's */
363/* */
364/* **************************************************************** */
365
366/* Return the absolute position of the first occurence of a node separator in
367 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node
368 separator was found. */
369long
370find_node_separator (SEARCH_BINDING *binding)
371{
372 register long i;
373 char *body;
374
375 body = binding->buffer;
376
377 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are
378 optional, but the DELETE and NEWLINE are not. This separator holds
379 true for all separated elements in an Info file, including the tags
380 table (if present) and the indirect tags table (if present). */
381 for (i = binding->start; i < binding->end - 1; i++)
382 if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
383 (body[i + 2] == '\n' ||
384 (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
385 ((body[i] == INFO_COOKIE) &&
386 (body[i + 1] == '\n' ||
387 (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
388 return (i);
389 return (-1);
390}
391
392/* Return the length of the node separator characters that BODY is
393 currently pointing at. */
394int
395skip_node_separator (char *body)
396{
397 register int i;
398
399 i = 0;
400
401 if (body[i] == INFO_FF)
402 i++;
403
404 if (body[i++] != INFO_COOKIE)
405 return (0);
406
407 if (body[i] == INFO_FF)
408 i++;
409
410 if (body[i++] != '\n')
411 return (0);
412
413 return (i);
414}
415
416/* Return the number of characters from STRING to the start of
417 the next line. */
418int
419skip_line (char *string)
420{
421 register int i;
422
423 for (i = 0; string && string[i] && string[i] != '\n'; i++);
424
425 if (string[i] == '\n')
426 i++;
427
428 return (i);
429}
430
431/* Return the absolute position of the beginning of a tags table in this
432 binding starting the search at binding->start. */
433long
434find_tags_table (SEARCH_BINDING *binding)
435{
436 SEARCH_BINDING tmp_search;
437 long position;
438
439 tmp_search.buffer = binding->buffer;
440 tmp_search.start = binding->start;
441 tmp_search.end = binding->end;
442 tmp_search.flags = S_FoldCase;
443
444 while ((position = find_node_separator (&tmp_search)) != -1 )
445 {
446 tmp_search.start = position;
447 tmp_search.start += skip_node_separator (tmp_search.buffer
448 + tmp_search.start);
449
450 if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
451 return (position);
452 }
453 return (-1);
454}
455
456/* Return the absolute position of the node named NODENAME in BINDING.
457 This is a brute force search, and we wish to avoid it when possible.
458 This function is called when a tag (indirect or otherwise) doesn't
459 really point to the right node. It returns the absolute position of
460 the separator preceding the node. */
461long
462find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
463{
464 long position;
465 int offset, namelen;
466 SEARCH_BINDING tmp_search;
467
468 namelen = strlen (nodename);
469
470 tmp_search.buffer = binding->buffer;
471 tmp_search.start = binding->start;
472 tmp_search.end = binding->end;
473 tmp_search.flags = 0;
474
475 while ((position = find_node_separator (&tmp_search)) != -1)
476 {
477 tmp_search.start = position;
478 tmp_search.start += skip_node_separator
479 (tmp_search.buffer + tmp_search.start);
480
481 offset = string_in_line
482 (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
483
484 if (offset == -1)
485 continue;
486
487 tmp_search.start += offset;
488 tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
489 offset = skip_node_characters
490 (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
491
492 /* Notice that this is an exact match. You cannot grovel through
493 the buffer with this function looking for random nodes. */
494 if ((offset == namelen) &&
495 (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
496 (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))
497 return (position);
498 }
499 return (-1);
500}
Note: See TracBrowser for help on using the repository browser.