1 | /* ccl - routines for character classes */
|
---|
2 |
|
---|
3 | /* Copyright (c) 1990 The Regents of the University of California. */
|
---|
4 | /* All rights reserved. */
|
---|
5 |
|
---|
6 | /* This code is derived from software contributed to Berkeley by */
|
---|
7 | /* Vern Paxson. */
|
---|
8 |
|
---|
9 | /* The United States Government has rights in this work pursuant */
|
---|
10 | /* to contract no. DE-AC03-76SF00098 between the United States */
|
---|
11 | /* Department of Energy and the University of California. */
|
---|
12 |
|
---|
13 | /* This file is part of flex. */
|
---|
14 |
|
---|
15 | /* Redistribution and use in source and binary forms, with or without */
|
---|
16 | /* modification, are permitted provided that the following conditions */
|
---|
17 | /* are met: */
|
---|
18 |
|
---|
19 | /* 1. Redistributions of source code must retain the above copyright */
|
---|
20 | /* notice, this list of conditions and the following disclaimer. */
|
---|
21 | /* 2. Redistributions in binary form must reproduce the above copyright */
|
---|
22 | /* notice, this list of conditions and the following disclaimer in the */
|
---|
23 | /* documentation and/or other materials provided with the distribution. */
|
---|
24 |
|
---|
25 | /* Neither the name of the University nor the names of its contributors */
|
---|
26 | /* may be used to endorse or promote products derived from this software */
|
---|
27 | /* without specific prior written permission. */
|
---|
28 |
|
---|
29 | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
|
---|
30 | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
|
---|
31 | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
---|
32 | /* PURPOSE. */
|
---|
33 |
|
---|
34 | #include "flexdef.h"
|
---|
35 |
|
---|
36 | /* ccladd - add a single character to a ccl */
|
---|
37 |
|
---|
38 | void ccladd (cclp, ch)
|
---|
39 | int cclp;
|
---|
40 | int ch;
|
---|
41 | {
|
---|
42 | int ind, len, newpos, i;
|
---|
43 |
|
---|
44 | check_char (ch);
|
---|
45 |
|
---|
46 | len = ccllen[cclp];
|
---|
47 | ind = cclmap[cclp];
|
---|
48 |
|
---|
49 | /* check to see if the character is already in the ccl */
|
---|
50 |
|
---|
51 | for (i = 0; i < len; ++i)
|
---|
52 | if (ccltbl[ind + i] == ch)
|
---|
53 | return;
|
---|
54 |
|
---|
55 | /* mark newlines */
|
---|
56 | if (ch == nlch)
|
---|
57 | ccl_has_nl[cclp] = true;
|
---|
58 |
|
---|
59 | newpos = ind + len;
|
---|
60 |
|
---|
61 | if (newpos >= current_max_ccl_tbl_size) {
|
---|
62 | current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
|
---|
63 |
|
---|
64 | ++num_reallocs;
|
---|
65 |
|
---|
66 | ccltbl = reallocate_Character_array (ccltbl,
|
---|
67 | current_max_ccl_tbl_size);
|
---|
68 | }
|
---|
69 |
|
---|
70 | ccllen[cclp] = len + 1;
|
---|
71 | ccltbl[newpos] = ch;
|
---|
72 | }
|
---|
73 |
|
---|
74 |
|
---|
75 | /* cclinit - return an empty ccl */
|
---|
76 |
|
---|
77 | int cclinit ()
|
---|
78 | {
|
---|
79 | if (++lastccl >= current_maxccls) {
|
---|
80 | current_maxccls += MAX_CCLS_INCREMENT;
|
---|
81 |
|
---|
82 | ++num_reallocs;
|
---|
83 |
|
---|
84 | cclmap =
|
---|
85 | reallocate_integer_array (cclmap, current_maxccls);
|
---|
86 | ccllen =
|
---|
87 | reallocate_integer_array (ccllen, current_maxccls);
|
---|
88 | cclng = reallocate_integer_array (cclng, current_maxccls);
|
---|
89 | ccl_has_nl =
|
---|
90 | reallocate_bool_array (ccl_has_nl,
|
---|
91 | current_maxccls);
|
---|
92 | }
|
---|
93 |
|
---|
94 | if (lastccl == 1)
|
---|
95 | /* we're making the first ccl */
|
---|
96 | cclmap[lastccl] = 0;
|
---|
97 |
|
---|
98 | else
|
---|
99 | /* The new pointer is just past the end of the last ccl.
|
---|
100 | * Since the cclmap points to the \first/ character of a
|
---|
101 | * ccl, adding the length of the ccl to the cclmap pointer
|
---|
102 | * will produce a cursor to the first free space.
|
---|
103 | */
|
---|
104 | cclmap[lastccl] =
|
---|
105 | cclmap[lastccl - 1] + ccllen[lastccl - 1];
|
---|
106 |
|
---|
107 | ccllen[lastccl] = 0;
|
---|
108 | cclng[lastccl] = 0; /* ccl's start out life un-negated */
|
---|
109 | ccl_has_nl[lastccl] = false;
|
---|
110 |
|
---|
111 | return lastccl;
|
---|
112 | }
|
---|
113 |
|
---|
114 |
|
---|
115 | /* cclnegate - negate the given ccl */
|
---|
116 |
|
---|
117 | void cclnegate (cclp)
|
---|
118 | int cclp;
|
---|
119 | {
|
---|
120 | cclng[cclp] = 1;
|
---|
121 | ccl_has_nl[cclp] = !ccl_has_nl[cclp];
|
---|
122 | }
|
---|
123 |
|
---|
124 |
|
---|
125 | /* list_character_set - list the members of a set of characters in CCL form
|
---|
126 | *
|
---|
127 | * Writes to the given file a character-class representation of those
|
---|
128 | * characters present in the given CCL. A character is present if it
|
---|
129 | * has a non-zero value in the cset array.
|
---|
130 | */
|
---|
131 |
|
---|
132 | void list_character_set (file, cset)
|
---|
133 | FILE *file;
|
---|
134 | int cset[];
|
---|
135 | {
|
---|
136 | register int i;
|
---|
137 |
|
---|
138 | putc ('[', file);
|
---|
139 |
|
---|
140 | for (i = 0; i < csize; ++i) {
|
---|
141 | if (cset[i]) {
|
---|
142 | register int start_char = i;
|
---|
143 |
|
---|
144 | putc (' ', file);
|
---|
145 |
|
---|
146 | fputs (readable_form (i), file);
|
---|
147 |
|
---|
148 | while (++i < csize && cset[i]) ;
|
---|
149 |
|
---|
150 | if (i - 1 > start_char)
|
---|
151 | /* this was a run */
|
---|
152 | fprintf (file, "-%s",
|
---|
153 | readable_form (i - 1));
|
---|
154 |
|
---|
155 | putc (' ', file);
|
---|
156 | }
|
---|
157 | }
|
---|
158 |
|
---|
159 | putc (']', file);
|
---|
160 | }
|
---|
161 |
|
---|
162 | /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
|
---|
163 | * scanner. Specifically, if a lowercase or uppercase character, x, is in the
|
---|
164 | * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
|
---|
165 | * be in the range. If not, then this range is ambiguous, and the function
|
---|
166 | * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
|
---|
167 | * [a-z] will be labeled ambiguous because it does not include [A-Z].
|
---|
168 | *
|
---|
169 | * @param c1 the lower end of the range
|
---|
170 | * @param c2 the upper end of the range
|
---|
171 | * @return true if [c1-c2] is not ambiguous for a caseless scanner.
|
---|
172 | */
|
---|
173 | bool range_covers_case (int c1, int c2)
|
---|
174 | {
|
---|
175 | int i, o;
|
---|
176 |
|
---|
177 | for (i = c1; i <= c2; i++) {
|
---|
178 | if (has_case (i)) {
|
---|
179 | o = reverse_case (i);
|
---|
180 | if (o < c1 || c2 < o)
|
---|
181 | return false;
|
---|
182 | }
|
---|
183 | }
|
---|
184 | return true;
|
---|
185 | }
|
---|
186 |
|
---|
187 | /** Reverse the case of a character, if possible.
|
---|
188 | * @return c if case-reversal does not apply.
|
---|
189 | */
|
---|
190 | int reverse_case (int c)
|
---|
191 | {
|
---|
192 | return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
|
---|
193 | }
|
---|
194 |
|
---|
195 | /** Return true if c is uppercase or lowercase. */
|
---|
196 | bool has_case (int c)
|
---|
197 | {
|
---|
198 | return (isupper (c) || islower (c)) ? true : false;
|
---|
199 | }
|
---|