| 1 | /* ccl - routines for character classes */ | 
|---|
| 2 |  | 
|---|
| 3 | /*  Copyright (c) 1990 The Regents of the University of California. */ | 
|---|
| 4 | /*  All rights reserved. */ | 
|---|
| 5 |  | 
|---|
| 6 | /*  This code is derived from software contributed to Berkeley by */ | 
|---|
| 7 | /*  Vern Paxson. */ | 
|---|
| 8 |  | 
|---|
| 9 | /*  The United States Government has rights in this work pursuant */ | 
|---|
| 10 | /*  to contract no. DE-AC03-76SF00098 between the United States */ | 
|---|
| 11 | /*  Department of Energy and the University of California. */ | 
|---|
| 12 |  | 
|---|
| 13 | /*  This file is part of flex. */ | 
|---|
| 14 |  | 
|---|
| 15 | /*  Redistribution and use in source and binary forms, with or without */ | 
|---|
| 16 | /*  modification, are permitted provided that the following conditions */ | 
|---|
| 17 | /*  are met: */ | 
|---|
| 18 |  | 
|---|
| 19 | /*  1. Redistributions of source code must retain the above copyright */ | 
|---|
| 20 | /*     notice, this list of conditions and the following disclaimer. */ | 
|---|
| 21 | /*  2. Redistributions in binary form must reproduce the above copyright */ | 
|---|
| 22 | /*     notice, this list of conditions and the following disclaimer in the */ | 
|---|
| 23 | /*     documentation and/or other materials provided with the distribution. */ | 
|---|
| 24 |  | 
|---|
| 25 | /*  Neither the name of the University nor the names of its contributors */ | 
|---|
| 26 | /*  may be used to endorse or promote products derived from this software */ | 
|---|
| 27 | /*  without specific prior written permission. */ | 
|---|
| 28 |  | 
|---|
| 29 | /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ | 
|---|
| 30 | /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ | 
|---|
| 31 | /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ | 
|---|
| 32 | /*  PURPOSE. */ | 
|---|
| 33 |  | 
|---|
| 34 | #include "flexdef.h" | 
|---|
| 35 |  | 
|---|
| 36 | /* ccladd - add a single character to a ccl */ | 
|---|
| 37 |  | 
|---|
| 38 | void    ccladd (cclp, ch) | 
|---|
| 39 | int     cclp; | 
|---|
| 40 | int     ch; | 
|---|
| 41 | { | 
|---|
| 42 | int     ind, len, newpos, i; | 
|---|
| 43 |  | 
|---|
| 44 | check_char (ch); | 
|---|
| 45 |  | 
|---|
| 46 | len = ccllen[cclp]; | 
|---|
| 47 | ind = cclmap[cclp]; | 
|---|
| 48 |  | 
|---|
| 49 | /* check to see if the character is already in the ccl */ | 
|---|
| 50 |  | 
|---|
| 51 | for (i = 0; i < len; ++i) | 
|---|
| 52 | if (ccltbl[ind + i] == ch) | 
|---|
| 53 | return; | 
|---|
| 54 |  | 
|---|
| 55 | /* mark newlines */ | 
|---|
| 56 | if (ch == nlch) | 
|---|
| 57 | ccl_has_nl[cclp] = true; | 
|---|
| 58 |  | 
|---|
| 59 | newpos = ind + len; | 
|---|
| 60 |  | 
|---|
| 61 | if (newpos >= current_max_ccl_tbl_size) { | 
|---|
| 62 | current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; | 
|---|
| 63 |  | 
|---|
| 64 | ++num_reallocs; | 
|---|
| 65 |  | 
|---|
| 66 | ccltbl = reallocate_Character_array (ccltbl, | 
|---|
| 67 | current_max_ccl_tbl_size); | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | ccllen[cclp] = len + 1; | 
|---|
| 71 | ccltbl[newpos] = ch; | 
|---|
| 72 | } | 
|---|
| 73 |  | 
|---|
| 74 |  | 
|---|
| 75 | /* cclinit - return an empty ccl */ | 
|---|
| 76 |  | 
|---|
| 77 | int     cclinit () | 
|---|
| 78 | { | 
|---|
| 79 | if (++lastccl >= current_maxccls) { | 
|---|
| 80 | current_maxccls += MAX_CCLS_INCREMENT; | 
|---|
| 81 |  | 
|---|
| 82 | ++num_reallocs; | 
|---|
| 83 |  | 
|---|
| 84 | cclmap = | 
|---|
| 85 | reallocate_integer_array (cclmap, current_maxccls); | 
|---|
| 86 | ccllen = | 
|---|
| 87 | reallocate_integer_array (ccllen, current_maxccls); | 
|---|
| 88 | cclng = reallocate_integer_array (cclng, current_maxccls); | 
|---|
| 89 | ccl_has_nl = | 
|---|
| 90 | reallocate_bool_array (ccl_has_nl, | 
|---|
| 91 | current_maxccls); | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | if (lastccl == 1) | 
|---|
| 95 | /* we're making the first ccl */ | 
|---|
| 96 | cclmap[lastccl] = 0; | 
|---|
| 97 |  | 
|---|
| 98 | else | 
|---|
| 99 | /* The new pointer is just past the end of the last ccl. | 
|---|
| 100 | * Since the cclmap points to the \first/ character of a | 
|---|
| 101 | * ccl, adding the length of the ccl to the cclmap pointer | 
|---|
| 102 | * will produce a cursor to the first free space. | 
|---|
| 103 | */ | 
|---|
| 104 | cclmap[lastccl] = | 
|---|
| 105 | cclmap[lastccl - 1] + ccllen[lastccl - 1]; | 
|---|
| 106 |  | 
|---|
| 107 | ccllen[lastccl] = 0; | 
|---|
| 108 | cclng[lastccl] = 0;     /* ccl's start out life un-negated */ | 
|---|
| 109 | ccl_has_nl[lastccl] = false; | 
|---|
| 110 |  | 
|---|
| 111 | return lastccl; | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 |  | 
|---|
| 115 | /* cclnegate - negate the given ccl */ | 
|---|
| 116 |  | 
|---|
| 117 | void    cclnegate (cclp) | 
|---|
| 118 | int     cclp; | 
|---|
| 119 | { | 
|---|
| 120 | cclng[cclp] = 1; | 
|---|
| 121 | ccl_has_nl[cclp] = !ccl_has_nl[cclp]; | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 |  | 
|---|
| 125 | /* list_character_set - list the members of a set of characters in CCL form | 
|---|
| 126 | * | 
|---|
| 127 | * Writes to the given file a character-class representation of those | 
|---|
| 128 | * characters present in the given CCL.  A character is present if it | 
|---|
| 129 | * has a non-zero value in the cset array. | 
|---|
| 130 | */ | 
|---|
| 131 |  | 
|---|
| 132 | void    list_character_set (file, cset) | 
|---|
| 133 | FILE   *file; | 
|---|
| 134 | int     cset[]; | 
|---|
| 135 | { | 
|---|
| 136 | register int i; | 
|---|
| 137 |  | 
|---|
| 138 | putc ('[', file); | 
|---|
| 139 |  | 
|---|
| 140 | for (i = 0; i < csize; ++i) { | 
|---|
| 141 | if (cset[i]) { | 
|---|
| 142 | register int start_char = i; | 
|---|
| 143 |  | 
|---|
| 144 | putc (' ', file); | 
|---|
| 145 |  | 
|---|
| 146 | fputs (readable_form (i), file); | 
|---|
| 147 |  | 
|---|
| 148 | while (++i < csize && cset[i]) ; | 
|---|
| 149 |  | 
|---|
| 150 | if (i - 1 > start_char) | 
|---|
| 151 | /* this was a run */ | 
|---|
| 152 | fprintf (file, "-%s", | 
|---|
| 153 | readable_form (i - 1)); | 
|---|
| 154 |  | 
|---|
| 155 | putc (' ', file); | 
|---|
| 156 | } | 
|---|
| 157 | } | 
|---|
| 158 |  | 
|---|
| 159 | putc (']', file); | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /** Determines if the range [c1-c2] is unambiguous in a case-insensitive | 
|---|
| 163 | * scanner.  Specifically, if a lowercase or uppercase character, x, is in the | 
|---|
| 164 | * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also | 
|---|
| 165 | * be in the range. If not, then this range is ambiguous, and the function | 
|---|
| 166 | * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that | 
|---|
| 167 | * [a-z] will be labeled ambiguous because it does not include [A-Z]. | 
|---|
| 168 | * | 
|---|
| 169 | * @param c1 the lower end of the range | 
|---|
| 170 | * @param c2 the upper end of the range | 
|---|
| 171 | * @return true if [c1-c2] is not ambiguous for a caseless scanner. | 
|---|
| 172 | */ | 
|---|
| 173 | bool range_covers_case (int c1, int c2) | 
|---|
| 174 | { | 
|---|
| 175 | int     i, o; | 
|---|
| 176 |  | 
|---|
| 177 | for (i = c1; i <= c2; i++) { | 
|---|
| 178 | if (has_case (i)) { | 
|---|
| 179 | o = reverse_case (i); | 
|---|
| 180 | if (o < c1 || c2 < o) | 
|---|
| 181 | return false; | 
|---|
| 182 | } | 
|---|
| 183 | } | 
|---|
| 184 | return true; | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | /** Reverse the case of a character, if possible. | 
|---|
| 188 | * @return c if case-reversal does not apply. | 
|---|
| 189 | */ | 
|---|
| 190 | int reverse_case (int c) | 
|---|
| 191 | { | 
|---|
| 192 | return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | /** Return true if c is uppercase or lowercase. */ | 
|---|
| 196 | bool has_case (int c) | 
|---|
| 197 | { | 
|---|
| 198 | return (isupper (c) || islower (c)) ? true : false; | 
|---|
| 199 | } | 
|---|