1 | /* bigram -- list bigrams for locate
|
---|
2 | Copyright (C) 1994 Free Software Foundation, Inc.
|
---|
3 |
|
---|
4 | This program is free software; you can redistribute it and/or modify
|
---|
5 | it under the terms of the GNU General Public License as published by
|
---|
6 | the Free Software Foundation; either version 2, or (at your option)
|
---|
7 | any later version.
|
---|
8 |
|
---|
9 | This program is distributed in the hope that it will be useful,
|
---|
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
12 | GNU General Public License for more details.
|
---|
13 |
|
---|
14 | You should have received a copy of the GNU General Public License
|
---|
15 | along with this program; if not, write to the Free Software
|
---|
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
---|
17 | USA.
|
---|
18 | */
|
---|
19 |
|
---|
20 | /* Usage: bigram < text > bigrams
|
---|
21 | Use `code' to encode a file using this output.
|
---|
22 |
|
---|
23 | Read a file from stdin and write out the bigrams (pairs of
|
---|
24 | adjacent characters), one bigram per line, to stdout. To reduce
|
---|
25 | needless duplication in the output, it starts finding the
|
---|
26 | bigrams on each input line at the character where that line
|
---|
27 | first differs from the previous line (i.e., in the ASCII
|
---|
28 | remainder). Therefore, the input should be sorted in order to
|
---|
29 | get the least redundant output.
|
---|
30 |
|
---|
31 | Written by James A. Woods <jwoods@adobe.com>.
|
---|
32 | Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
|
---|
33 |
|
---|
34 | #include <config.h>
|
---|
35 | #include <stdio.h>
|
---|
36 |
|
---|
37 | #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
|
---|
38 | #include <string.h>
|
---|
39 | #else
|
---|
40 | #include <strings.h>
|
---|
41 | #endif
|
---|
42 |
|
---|
43 | #ifdef STDC_HEADERS
|
---|
44 | #include <stdlib.h>
|
---|
45 | #endif
|
---|
46 | #include <sys/types.h>
|
---|
47 |
|
---|
48 | #include <getline.h>
|
---|
49 | #include <xalloc.h>
|
---|
50 | #include "closeout.h"
|
---|
51 |
|
---|
52 | /* The name this program was run with. */
|
---|
53 | char *program_name;
|
---|
54 |
|
---|
55 | /* Return the length of the longest common prefix of strings S1 and S2. */
|
---|
56 |
|
---|
57 | static int
|
---|
58 | prefix_length (char *s1, char *s2)
|
---|
59 | {
|
---|
60 | register char *start;
|
---|
61 |
|
---|
62 | for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
|
---|
63 | ;
|
---|
64 | return s1 - start;
|
---|
65 | }
|
---|
66 |
|
---|
67 | int
|
---|
68 | main (int argc, char **argv)
|
---|
69 | {
|
---|
70 | char *path; /* The current input entry. */
|
---|
71 | char *oldpath; /* The previous input entry. */
|
---|
72 | size_t pathsize, oldpathsize; /* Amounts allocated for them. */
|
---|
73 | int line_len; /* Length of input line. */
|
---|
74 |
|
---|
75 | program_name = argv[0];
|
---|
76 | (void) argc;
|
---|
77 | atexit (close_stdout);
|
---|
78 |
|
---|
79 | pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
|
---|
80 | path = xmalloc (pathsize);
|
---|
81 | oldpath = xmalloc (oldpathsize);
|
---|
82 |
|
---|
83 | /* Set to anything not starting with a slash, to force the first
|
---|
84 | prefix count to 0. */
|
---|
85 | strcpy (oldpath, " ");
|
---|
86 |
|
---|
87 | while ((line_len = getline (&path, &pathsize, stdin)) > 0)
|
---|
88 | {
|
---|
89 | register int count; /* The prefix length. */
|
---|
90 | register int j; /* Index into input line. */
|
---|
91 |
|
---|
92 | path[line_len - 1] = '\0'; /* Remove the newline. */
|
---|
93 |
|
---|
94 | /* Output bigrams in the remainder only. */
|
---|
95 | count = prefix_length (oldpath, path);
|
---|
96 | for (j = count; path[j] != '\0' && path[j + 1] != '\0'; j += 2)
|
---|
97 | {
|
---|
98 | putchar (path[j]);
|
---|
99 | putchar (path[j + 1]);
|
---|
100 | putchar ('\n');
|
---|
101 | }
|
---|
102 |
|
---|
103 | {
|
---|
104 | /* Swap path and oldpath and their sizes. */
|
---|
105 | char *tmppath = oldpath;
|
---|
106 | size_t tmppathsize = oldpathsize;
|
---|
107 | oldpath = path;
|
---|
108 | oldpathsize = pathsize;
|
---|
109 | path = tmppath;
|
---|
110 | pathsize = tmppathsize;
|
---|
111 | }
|
---|
112 | }
|
---|
113 |
|
---|
114 | free (path);
|
---|
115 | free (oldpath);
|
---|
116 |
|
---|
117 | return 0;
|
---|
118 | }
|
---|