[3031] | 1 | /* ccl - routines for character classes */
|
---|
| 2 |
|
---|
| 3 | /* Copyright (c) 1990 The Regents of the University of California. */
|
---|
| 4 | /* All rights reserved. */
|
---|
| 5 |
|
---|
| 6 | /* This code is derived from software contributed to Berkeley by */
|
---|
| 7 | /* Vern Paxson. */
|
---|
| 8 |
|
---|
| 9 | /* The United States Government has rights in this work pursuant */
|
---|
| 10 | /* to contract no. DE-AC03-76SF00098 between the United States */
|
---|
| 11 | /* Department of Energy and the University of California. */
|
---|
| 12 |
|
---|
| 13 | /* This file is part of flex. */
|
---|
| 14 |
|
---|
| 15 | /* Redistribution and use in source and binary forms, with or without */
|
---|
| 16 | /* modification, are permitted provided that the following conditions */
|
---|
| 17 | /* are met: */
|
---|
| 18 |
|
---|
| 19 | /* 1. Redistributions of source code must retain the above copyright */
|
---|
| 20 | /* notice, this list of conditions and the following disclaimer. */
|
---|
| 21 | /* 2. Redistributions in binary form must reproduce the above copyright */
|
---|
| 22 | /* notice, this list of conditions and the following disclaimer in the */
|
---|
| 23 | /* documentation and/or other materials provided with the distribution. */
|
---|
| 24 |
|
---|
| 25 | /* Neither the name of the University nor the names of its contributors */
|
---|
| 26 | /* may be used to endorse or promote products derived from this software */
|
---|
| 27 | /* without specific prior written permission. */
|
---|
| 28 |
|
---|
| 29 | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
|
---|
| 30 | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
|
---|
| 31 | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
---|
| 32 | /* PURPOSE. */
|
---|
| 33 |
|
---|
| 34 | #include "flexdef.h"
|
---|
| 35 |
|
---|
| 36 | /* ccladd - add a single character to a ccl */
|
---|
| 37 |
|
---|
| 38 | void ccladd (cclp, ch)
|
---|
| 39 | int cclp;
|
---|
| 40 | int ch;
|
---|
| 41 | {
|
---|
| 42 | int ind, len, newpos, i;
|
---|
| 43 |
|
---|
| 44 | check_char (ch);
|
---|
| 45 |
|
---|
| 46 | len = ccllen[cclp];
|
---|
| 47 | ind = cclmap[cclp];
|
---|
| 48 |
|
---|
| 49 | /* check to see if the character is already in the ccl */
|
---|
| 50 |
|
---|
| 51 | for (i = 0; i < len; ++i)
|
---|
| 52 | if (ccltbl[ind + i] == ch)
|
---|
| 53 | return;
|
---|
| 54 |
|
---|
| 55 | /* mark newlines */
|
---|
| 56 | if (ch == nlch)
|
---|
| 57 | ccl_has_nl[cclp] = true;
|
---|
| 58 |
|
---|
| 59 | newpos = ind + len;
|
---|
| 60 |
|
---|
| 61 | if (newpos >= current_max_ccl_tbl_size) {
|
---|
| 62 | current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
|
---|
| 63 |
|
---|
| 64 | ++num_reallocs;
|
---|
| 65 |
|
---|
| 66 | ccltbl = reallocate_Character_array (ccltbl,
|
---|
| 67 | current_max_ccl_tbl_size);
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | ccllen[cclp] = len + 1;
|
---|
| 71 | ccltbl[newpos] = ch;
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 |
|
---|
| 75 | /* cclinit - return an empty ccl */
|
---|
| 76 |
|
---|
| 77 | int cclinit ()
|
---|
| 78 | {
|
---|
| 79 | if (++lastccl >= current_maxccls) {
|
---|
| 80 | current_maxccls += MAX_CCLS_INCREMENT;
|
---|
| 81 |
|
---|
| 82 | ++num_reallocs;
|
---|
| 83 |
|
---|
| 84 | cclmap =
|
---|
| 85 | reallocate_integer_array (cclmap, current_maxccls);
|
---|
| 86 | ccllen =
|
---|
| 87 | reallocate_integer_array (ccllen, current_maxccls);
|
---|
| 88 | cclng = reallocate_integer_array (cclng, current_maxccls);
|
---|
| 89 | ccl_has_nl =
|
---|
| 90 | reallocate_bool_array (ccl_has_nl,
|
---|
| 91 | current_maxccls);
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | if (lastccl == 1)
|
---|
| 95 | /* we're making the first ccl */
|
---|
| 96 | cclmap[lastccl] = 0;
|
---|
| 97 |
|
---|
| 98 | else
|
---|
| 99 | /* The new pointer is just past the end of the last ccl.
|
---|
| 100 | * Since the cclmap points to the \first/ character of a
|
---|
| 101 | * ccl, adding the length of the ccl to the cclmap pointer
|
---|
| 102 | * will produce a cursor to the first free space.
|
---|
| 103 | */
|
---|
| 104 | cclmap[lastccl] =
|
---|
| 105 | cclmap[lastccl - 1] + ccllen[lastccl - 1];
|
---|
| 106 |
|
---|
| 107 | ccllen[lastccl] = 0;
|
---|
| 108 | cclng[lastccl] = 0; /* ccl's start out life un-negated */
|
---|
| 109 | ccl_has_nl[lastccl] = false;
|
---|
| 110 |
|
---|
| 111 | return lastccl;
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 |
|
---|
| 115 | /* cclnegate - negate the given ccl */
|
---|
| 116 |
|
---|
| 117 | void cclnegate (cclp)
|
---|
| 118 | int cclp;
|
---|
| 119 | {
|
---|
| 120 | cclng[cclp] = 1;
|
---|
| 121 | ccl_has_nl[cclp] = !ccl_has_nl[cclp];
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 |
|
---|
| 125 | /* list_character_set - list the members of a set of characters in CCL form
|
---|
| 126 | *
|
---|
| 127 | * Writes to the given file a character-class representation of those
|
---|
| 128 | * characters present in the given CCL. A character is present if it
|
---|
| 129 | * has a non-zero value in the cset array.
|
---|
| 130 | */
|
---|
| 131 |
|
---|
| 132 | void list_character_set (file, cset)
|
---|
| 133 | FILE *file;
|
---|
| 134 | int cset[];
|
---|
| 135 | {
|
---|
| 136 | register int i;
|
---|
| 137 |
|
---|
| 138 | putc ('[', file);
|
---|
| 139 |
|
---|
| 140 | for (i = 0; i < csize; ++i) {
|
---|
| 141 | if (cset[i]) {
|
---|
| 142 | register int start_char = i;
|
---|
| 143 |
|
---|
| 144 | putc (' ', file);
|
---|
| 145 |
|
---|
| 146 | fputs (readable_form (i), file);
|
---|
| 147 |
|
---|
| 148 | while (++i < csize && cset[i]) ;
|
---|
| 149 |
|
---|
| 150 | if (i - 1 > start_char)
|
---|
| 151 | /* this was a run */
|
---|
| 152 | fprintf (file, "-%s",
|
---|
| 153 | readable_form (i - 1));
|
---|
| 154 |
|
---|
| 155 | putc (' ', file);
|
---|
| 156 | }
|
---|
| 157 | }
|
---|
| 158 |
|
---|
| 159 | putc (']', file);
|
---|
| 160 | }
|
---|
| 161 |
|
---|
| 162 | /** Determines if the range [c1-c2] is unambiguous in a case-insensitive
|
---|
| 163 | * scanner. Specifically, if a lowercase or uppercase character, x, is in the
|
---|
| 164 | * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
|
---|
| 165 | * be in the range. If not, then this range is ambiguous, and the function
|
---|
| 166 | * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
|
---|
| 167 | * [a-z] will be labeled ambiguous because it does not include [A-Z].
|
---|
| 168 | *
|
---|
| 169 | * @param c1 the lower end of the range
|
---|
| 170 | * @param c2 the upper end of the range
|
---|
| 171 | * @return true if [c1-c2] is not ambiguous for a caseless scanner.
|
---|
| 172 | */
|
---|
| 173 | bool range_covers_case (int c1, int c2)
|
---|
| 174 | {
|
---|
| 175 | int i, o;
|
---|
| 176 |
|
---|
| 177 | for (i = c1; i <= c2; i++) {
|
---|
| 178 | if (has_case (i)) {
|
---|
| 179 | o = reverse_case (i);
|
---|
| 180 | if (o < c1 || c2 < o)
|
---|
| 181 | return false;
|
---|
| 182 | }
|
---|
| 183 | }
|
---|
| 184 | return true;
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | /** Reverse the case of a character, if possible.
|
---|
| 188 | * @return c if case-reversal does not apply.
|
---|
| 189 | */
|
---|
| 190 | int reverse_case (int c)
|
---|
| 191 | {
|
---|
| 192 | return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | /** Return true if c is uppercase or lowercase. */
|
---|
| 196 | bool has_case (int c)
|
---|
| 197 | {
|
---|
| 198 | return (isupper (c) || islower (c)) ? true : false;
|
---|
| 199 | }
|
---|