| 1 | /*
|
|---|
| 2 | * Wine Message Compiler lexical scanner
|
|---|
| 3 | *
|
|---|
| 4 | * Copyright 2000 Bertho A. Stultiens (BS)
|
|---|
| 5 | *
|
|---|
| 6 | * This library is free software; you can redistribute it and/or
|
|---|
| 7 | * modify it under the terms of the GNU Lesser General Public
|
|---|
| 8 | * License as published by the Free Software Foundation; either
|
|---|
| 9 | * version 2.1 of the License, or (at your option) any later version.
|
|---|
| 10 | *
|
|---|
| 11 | * This library is distributed in the hope that it will be useful,
|
|---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|---|
| 14 | * Lesser General Public License for more details.
|
|---|
| 15 | *
|
|---|
| 16 | * You should have received a copy of the GNU Lesser General Public
|
|---|
| 17 | * License along with this library; if not, write to the Free Software
|
|---|
| 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|---|
| 19 | */
|
|---|
| 20 |
|
|---|
| 21 | #include "config.h"
|
|---|
| 22 |
|
|---|
| 23 | #include <stdio.h>
|
|---|
| 24 | #include <stdlib.h>
|
|---|
| 25 | #include <ctype.h>
|
|---|
| 26 | #include <assert.h>
|
|---|
| 27 | #include <string.h>
|
|---|
| 28 |
|
|---|
| 29 | #include "utils.h"
|
|---|
| 30 | #include "wmc.h"
|
|---|
| 31 | #include "lang.h"
|
|---|
| 32 |
|
|---|
| 33 | #include "y.tab.h"
|
|---|
| 34 |
|
|---|
| 35 | /*
|
|---|
| 36 | * Keywords are case insenitive. All normal input is treated as
|
|---|
| 37 | * being in codepage iso-8859-1 for ascii input files (unicode
|
|---|
| 38 | * page 0) and as equivalent unicode if unicode input is selected.
|
|---|
| 39 | * All normal input, which is not part of a message text, is
|
|---|
| 40 | * enforced to be unicode page 0. Otherwise an error will be
|
|---|
| 41 | * generated. The normal file data should only be ASCII because
|
|---|
| 42 | * that is the basic definition of the grammar.
|
|---|
| 43 | *
|
|---|
| 44 | * Byteorder or unicode input is determined automatically by
|
|---|
| 45 | * reading the first 8 bytes and checking them against unicode
|
|---|
| 46 | * page 0 byteorder (hibyte must be 0).
|
|---|
| 47 | * -- FIXME --
|
|---|
| 48 | * Alternatively, the input is checked against a special byte
|
|---|
| 49 | * sequence to identify the file.
|
|---|
| 50 | * -- FIXME --
|
|---|
| 51 | *
|
|---|
| 52 | *
|
|---|
| 53 | * Keywords:
|
|---|
| 54 | * Codepages
|
|---|
| 55 | * Facility
|
|---|
| 56 | * FacilityNames
|
|---|
| 57 | * LanguageNames
|
|---|
| 58 | * MessageId
|
|---|
| 59 | * MessageIdTypedef
|
|---|
| 60 | * Severity
|
|---|
| 61 | * SeverityNames
|
|---|
| 62 | * SymbolicName
|
|---|
| 63 | *
|
|---|
| 64 | * Default added identifiers for classes:
|
|---|
| 65 | * SeverityNames:
|
|---|
| 66 | * Success = 0x0
|
|---|
| 67 | * Informational = 0x1
|
|---|
| 68 | * Warning = 0x2
|
|---|
| 69 | * Error = 0x3
|
|---|
| 70 | * FacilityNames:
|
|---|
| 71 | * System = 0x0FF
|
|---|
| 72 | * Application = 0xFFF
|
|---|
| 73 | *
|
|---|
| 74 | * The 'Codepages' keyword is a wmc extension.
|
|---|
| 75 | */
|
|---|
| 76 |
|
|---|
| 77 | static WCHAR ustr_application[] = { 'A', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', 0 };
|
|---|
| 78 | static WCHAR ustr_codepages[] = { 'C', 'o', 'd', 'e', 'p', 'a', 'g', 'e', 's', 0 };
|
|---|
| 79 | static WCHAR ustr_english[] = { 'E', 'n', 'g', 'l', 'i', 's', 'h', 0 };
|
|---|
| 80 | static WCHAR ustr_error[] = { 'E', 'r', 'r', 'o', 'r', 0 };
|
|---|
| 81 | static WCHAR ustr_facility[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 0 };
|
|---|
| 82 | static WCHAR ustr_facilitynames[] = { 'F', 'a', 'c', 'i', 'l', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
|
|---|
| 83 | static WCHAR ustr_informational[] = { 'I', 'n', 'f', 'o', 'r', 'm', 'a', 't', 'i', 'o', 'n', 'a', 'l', 0 };
|
|---|
| 84 | static WCHAR ustr_language[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 0};
|
|---|
| 85 | static WCHAR ustr_languagenames[] = { 'L', 'a', 'n', 'g', 'u', 'a', 'g', 'e', 'N', 'a', 'm', 'e', 's', 0};
|
|---|
| 86 | static WCHAR ustr_messageid[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 0 };
|
|---|
| 87 | static WCHAR ustr_messageidtypedef[] = { 'M', 'e', 's', 's', 'a', 'g', 'e', 'I', 'd', 'T', 'y', 'p', 'e', 'd', 'e', 'f', 0 };
|
|---|
| 88 | static WCHAR ustr_outputbase[] = { 'O', 'u', 't', 'p', 'u', 't', 'B', 'a', 's', 'e', 0 };
|
|---|
| 89 | static WCHAR ustr_severity[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 0 };
|
|---|
| 90 | static WCHAR ustr_severitynames[] = { 'S', 'e', 'v', 'e', 'r', 'i', 't', 'y', 'N', 'a', 'm', 'e', 's', 0 };
|
|---|
| 91 | static WCHAR ustr_success[] = { 'S', 'u', 'c', 'c', 'e', 's', 's', 0 };
|
|---|
| 92 | static WCHAR ustr_symbolicname[] = { 'S', 'y', 'm', 'b', 'o', 'l', 'i', 'c', 'N', 'a', 'm', 'e', 0 };
|
|---|
| 93 | static WCHAR ustr_system[] = { 'S', 'y', 's', 't', 'e', 'm', 0 };
|
|---|
| 94 | static WCHAR ustr_warning[] = { 'W', 'a', 'r', 'n', 'i', 'n', 'g', 0 };
|
|---|
| 95 | static WCHAR ustr_msg00001[] = { 'm', 's', 'g', '0', '0', '0', '0', '1', 0 };
|
|---|
| 96 | /*
|
|---|
| 97 | * This table is to beat any form of "expression building" to check for
|
|---|
| 98 | * correct filename characters. It is also used for ident checks.
|
|---|
| 99 | * FIXME: use it more consistently.
|
|---|
| 100 | */
|
|---|
| 101 |
|
|---|
| 102 | #define CH_SHORTNAME 0x01
|
|---|
| 103 | #define CH_LONGNAME 0x02
|
|---|
| 104 | #define CH_IDENT 0x04
|
|---|
| 105 | #define CH_NUMBER 0x08
|
|---|
| 106 | /*#define CH_WILDCARD 0x10*/
|
|---|
| 107 | /*#define CH_DOT 0x20*/
|
|---|
| 108 | #define CH_PUNCT 0x40
|
|---|
| 109 | #define CH_INVALID 0x80
|
|---|
| 110 |
|
|---|
| 111 | static const char char_table[256] = {
|
|---|
| 112 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x00 - 0x07 */
|
|---|
| 113 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x08 - 0x0F */
|
|---|
| 114 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x10 - 0x17 */
|
|---|
| 115 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, /* 0x18 - 0x1F */
|
|---|
| 116 | 0x80, 0x03, 0x80, 0x03, 0x03, 0x03, 0x03, 0x03, /* 0x20 - 0x27 " !"#$%&'" */
|
|---|
| 117 | 0x43, 0x43, 0x10, 0x80, 0x03, 0x03, 0x22, 0x80, /* 0x28 - 0x2F "()*+,-./" */
|
|---|
| 118 | 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, /* 0x30 - 0x37 "01234567" */
|
|---|
| 119 | 0x0b, 0x0b, 0xc0, 0x80, 0x80, 0x80, 0x80, 0x10, /* 0x38 - 0x3F "89:;<=>?" */
|
|---|
| 120 | 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x40 - 0x47 "@ABCDEFG" */
|
|---|
| 121 | 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x48 - 0x4F "HIJKLMNO" */
|
|---|
| 122 | 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x50 - 0x57 "PQRSTUVW" */
|
|---|
| 123 | 0x07, 0x07, 0x07, 0x80, 0x80, 0x80, 0x80, 0x07, /* 0x58 - 0x5F "XYZ[\]^_" */
|
|---|
| 124 | 0x03, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x60 - 0x67 "`abcdefg" */
|
|---|
| 125 | 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x68 - 0x6F "hijklmno" */
|
|---|
| 126 | 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, /* 0x70 - 0x77 "pqrstuvw" */
|
|---|
| 127 | 0x07, 0x07, 0x07, 0x03, 0x80, 0x03, 0x03, 0x80, /* 0x78 - 0x7F "xyz{|}~ " */
|
|---|
| 128 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x80 - 0x87 */
|
|---|
| 129 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x88 - 0x8F */
|
|---|
| 130 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x90 - 0x97 */
|
|---|
| 131 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0x98 - 0x9F */
|
|---|
| 132 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA0 - 0xA7 */
|
|---|
| 133 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xA8 - 0xAF */
|
|---|
| 134 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB0 - 0xB7 */
|
|---|
| 135 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xB8 - 0xBF */
|
|---|
| 136 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC0 - 0xC7 */
|
|---|
| 137 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xC8 - 0xCF */
|
|---|
| 138 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD0 - 0xD7 */
|
|---|
| 139 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xD8 - 0xDF */
|
|---|
| 140 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE0 - 0xE7 */
|
|---|
| 141 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xE8 - 0xEF */
|
|---|
| 142 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 0xF0 - 0xF7 */
|
|---|
| 143 | 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x80, /* 0xF8 - 0xFF */
|
|---|
| 144 | };
|
|---|
| 145 |
|
|---|
| 146 | static int isisochar(int ch)
|
|---|
| 147 | {
|
|---|
| 148 | return !(ch & (~0xff));
|
|---|
| 149 | }
|
|---|
| 150 |
|
|---|
| 151 | static int codepage;
|
|---|
| 152 | static const union cptable *codepage_def;
|
|---|
| 153 |
|
|---|
| 154 | void set_codepage(int cp)
|
|---|
| 155 | {
|
|---|
| 156 | codepage = cp;
|
|---|
| 157 | codepage_def = find_codepage(codepage);
|
|---|
| 158 | if(!codepage_def)
|
|---|
| 159 | xyyerror("Codepage %d not found; cannot process", codepage);
|
|---|
| 160 | }
|
|---|
| 161 |
|
|---|
| 162 | /*
|
|---|
| 163 | * Input functions
|
|---|
| 164 | */
|
|---|
| 165 | static int nungetstack = 0;
|
|---|
| 166 | static int allocungetstack = 0;
|
|---|
| 167 | static char *ungetstack = NULL;
|
|---|
| 168 | static int ninputbuffer = 0;
|
|---|
| 169 | static WCHAR *inputbuffer = NULL;
|
|---|
| 170 | static char *xlatebuffer = NULL;
|
|---|
| 171 |
|
|---|
| 172 | #define INPUTBUFFER_SIZE 2048 /* Must be larger than 4 and approx. large enough to hold a line */
|
|---|
| 173 |
|
|---|
| 174 | /*
|
|---|
| 175 | * Fill the input buffer with *one* line of input.
|
|---|
| 176 | * The line is '\n' terminated so that scanning
|
|---|
| 177 | * messages with translation works as expected
|
|---|
| 178 | * (otherwise we cannot pre-translate because the
|
|---|
| 179 | * language is first known one line before the
|
|---|
| 180 | * actual message).
|
|---|
| 181 | */
|
|---|
| 182 | static int fill_inputbuffer(void)
|
|---|
| 183 | {
|
|---|
| 184 | int n;
|
|---|
| 185 | static char err_fatalread[] = "Fatal: reading input failed";
|
|---|
| 186 | static int endian = -1;
|
|---|
| 187 |
|
|---|
| 188 | if(!inputbuffer)
|
|---|
| 189 | {
|
|---|
| 190 | inputbuffer = xmalloc(INPUTBUFFER_SIZE);
|
|---|
| 191 | xlatebuffer = xmalloc(INPUTBUFFER_SIZE);
|
|---|
| 192 | }
|
|---|
| 193 |
|
|---|
| 194 | try_again:
|
|---|
| 195 | if(!unicodein)
|
|---|
| 196 | {
|
|---|
| 197 | char *cptr;
|
|---|
| 198 | cptr = fgets(xlatebuffer, INPUTBUFFER_SIZE, yyin);
|
|---|
| 199 | if(!cptr && ferror(yyin))
|
|---|
| 200 | xyyerror(err_fatalread);
|
|---|
| 201 | else if(!cptr)
|
|---|
| 202 | return 0;
|
|---|
| 203 | assert(codepage_def != NULL);
|
|---|
| 204 | n = cp_mbstowcs(codepage_def, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE);
|
|---|
| 205 | if(n < 0)
|
|---|
| 206 | internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)", n);
|
|---|
| 207 | if(n <= 1)
|
|---|
| 208 | goto try_again; /* Should not hapen */
|
|---|
| 209 | n--; /* Strip added conversion '\0' from input length */
|
|---|
| 210 | /*
|
|---|
| 211 | * FIXME:
|
|---|
| 212 | * Detect UTF-8 in the first time we read some bytes by
|
|---|
| 213 | * checking the special sequence "FE..." or something like
|
|---|
| 214 | * that. I need to check www.unicode.org for details.
|
|---|
| 215 | */
|
|---|
| 216 | }
|
|---|
| 217 | else
|
|---|
| 218 | {
|
|---|
| 219 | if(endian == -1)
|
|---|
| 220 | {
|
|---|
| 221 | n = fread(inputbuffer, 1, 8, yyin);
|
|---|
| 222 | if(n != 8)
|
|---|
| 223 | {
|
|---|
| 224 | if(!n && ferror(yyin))
|
|---|
| 225 | xyyerror(err_fatalread);
|
|---|
| 226 | else
|
|---|
| 227 | xyyerror("Fatal: file to short to determine byteorder (should never happen)");
|
|---|
| 228 | }
|
|---|
| 229 | if(isisochar(inputbuffer[0]) &&
|
|---|
| 230 | isisochar(inputbuffer[1]) &&
|
|---|
| 231 | isisochar(inputbuffer[2]) &&
|
|---|
| 232 | isisochar(inputbuffer[3]))
|
|---|
| 233 | {
|
|---|
| 234 | #ifdef WORDS_BIGENDIAN
|
|---|
| 235 | endian = WMC_BO_BIG;
|
|---|
| 236 | #else
|
|---|
| 237 | endian = WMC_BO_LITTLE;
|
|---|
| 238 | #endif
|
|---|
| 239 | }
|
|---|
| 240 | else if(isisochar(BYTESWAP_WORD(inputbuffer[0])) &&
|
|---|
| 241 | isisochar(BYTESWAP_WORD(inputbuffer[1])) &&
|
|---|
| 242 | isisochar(BYTESWAP_WORD(inputbuffer[2])) &&
|
|---|
| 243 | isisochar(BYTESWAP_WORD(inputbuffer[3])))
|
|---|
| 244 | {
|
|---|
| 245 | #ifdef WORDS_BIGENDIAN
|
|---|
| 246 | endian = WMC_BO_LITTLE;
|
|---|
| 247 | #else
|
|---|
| 248 | endian = WMC_BO_BIG;
|
|---|
| 249 | #endif
|
|---|
| 250 | }
|
|---|
| 251 | else
|
|---|
| 252 | xyyerror("Fatal: cannot determine file's byteorder");
|
|---|
| 253 | /* FIXME:
|
|---|
| 254 | * Determine the file-endian with the leader-bytes
|
|---|
| 255 | * "FF FE..."; can't remember the exact sequence.
|
|---|
| 256 | */
|
|---|
| 257 | n /= 2;
|
|---|
| 258 | #ifdef WORDS_BIGENDIAN
|
|---|
| 259 | if(endian == WMC_BO_LITTLE)
|
|---|
| 260 | #else
|
|---|
| 261 | if(endian == WMC_BO_BIG)
|
|---|
| 262 | #endif
|
|---|
| 263 | {
|
|---|
| 264 | inputbuffer[0] = BYTESWAP_WORD(inputbuffer[0]);
|
|---|
| 265 | inputbuffer[1] = BYTESWAP_WORD(inputbuffer[1]);
|
|---|
| 266 | inputbuffer[2] = BYTESWAP_WORD(inputbuffer[2]);
|
|---|
| 267 | inputbuffer[3] = BYTESWAP_WORD(inputbuffer[3]);
|
|---|
| 268 | }
|
|---|
| 269 |
|
|---|
| 270 | }
|
|---|
| 271 | else
|
|---|
| 272 | {
|
|---|
| 273 | int i;
|
|---|
| 274 | n = 0;
|
|---|
| 275 | for(i = 0; i < INPUTBUFFER_SIZE; i++)
|
|---|
| 276 | {
|
|---|
| 277 | int t;
|
|---|
| 278 | t = fread(&inputbuffer[i], 2, 1, yyin);
|
|---|
| 279 | if(!t && ferror(yyin))
|
|---|
| 280 | xyyerror(err_fatalread);
|
|---|
| 281 | else if(!t && n)
|
|---|
| 282 | break;
|
|---|
| 283 | n++;
|
|---|
| 284 | #ifdef WORDS_BIGENDIAN
|
|---|
| 285 | if(endian == WMC_BO_LITTLE)
|
|---|
| 286 | #else
|
|---|
| 287 | if(endian == WMC_BO_BIG)
|
|---|
| 288 | #endif
|
|---|
| 289 | {
|
|---|
| 290 | if((inputbuffer[i] = BYTESWAP_WORD(inputbuffer[i])) == '\n')
|
|---|
| 291 | break;
|
|---|
| 292 | }
|
|---|
| 293 | else
|
|---|
| 294 | {
|
|---|
| 295 | if(inputbuffer[i] == '\n')
|
|---|
| 296 | break;
|
|---|
| 297 | }
|
|---|
| 298 | }
|
|---|
| 299 | }
|
|---|
| 300 |
|
|---|
| 301 | }
|
|---|
| 302 |
|
|---|
| 303 | if(!n)
|
|---|
| 304 | {
|
|---|
| 305 | yywarning("Re-read line (input was or converted to zilch)");
|
|---|
| 306 | goto try_again; /* Should not happen, but could be due to stdin reading and a signal */
|
|---|
| 307 | }
|
|---|
| 308 |
|
|---|
| 309 | ninputbuffer += n;
|
|---|
| 310 | return 1;
|
|---|
| 311 | }
|
|---|
| 312 |
|
|---|
| 313 | static int get_unichar(void)
|
|---|
| 314 | {
|
|---|
| 315 | static WCHAR *b = NULL;
|
|---|
| 316 | char_number++;
|
|---|
| 317 |
|
|---|
| 318 | if(nungetstack)
|
|---|
| 319 | return ungetstack[--nungetstack];
|
|---|
| 320 |
|
|---|
| 321 | if(!ninputbuffer)
|
|---|
| 322 | {
|
|---|
| 323 | if(!fill_inputbuffer())
|
|---|
| 324 | return EOF;
|
|---|
| 325 | b = inputbuffer;
|
|---|
| 326 | }
|
|---|
| 327 |
|
|---|
| 328 | ninputbuffer--;
|
|---|
| 329 | return (int)(*b++ & 0xffff);
|
|---|
| 330 | }
|
|---|
| 331 |
|
|---|
| 332 | static void unget_unichar(int ch)
|
|---|
| 333 | {
|
|---|
| 334 | if(ch == EOF)
|
|---|
| 335 | return;
|
|---|
| 336 |
|
|---|
| 337 | char_number--;
|
|---|
| 338 |
|
|---|
| 339 | if(nungetstack == allocungetstack)
|
|---|
| 340 | {
|
|---|
| 341 | allocungetstack += 32;
|
|---|
| 342 | ungetstack = xrealloc(ungetstack, allocungetstack * sizeof(*ungetstack));
|
|---|
| 343 | }
|
|---|
| 344 |
|
|---|
| 345 | ungetstack[nungetstack++] = (WCHAR)ch;
|
|---|
| 346 | }
|
|---|
| 347 |
|
|---|
| 348 |
|
|---|
| 349 | /*
|
|---|
| 350 | * Normal character stack.
|
|---|
| 351 | * Used for number scanning.
|
|---|
| 352 | */
|
|---|
| 353 | static int ncharstack = 0;
|
|---|
| 354 | static int alloccharstack = 0;
|
|---|
| 355 | static char *charstack = NULL;
|
|---|
| 356 |
|
|---|
| 357 | static void empty_char_stack(void)
|
|---|
| 358 | {
|
|---|
| 359 | ncharstack = 0;
|
|---|
| 360 | }
|
|---|
| 361 |
|
|---|
| 362 | static void push_char(int ch)
|
|---|
| 363 | {
|
|---|
| 364 | if(ncharstack == alloccharstack)
|
|---|
| 365 | {
|
|---|
| 366 | alloccharstack += 32;
|
|---|
| 367 | charstack = xrealloc(charstack, alloccharstack * sizeof(*charstack));
|
|---|
| 368 | }
|
|---|
| 369 | charstack[ncharstack++] = (char)ch;
|
|---|
| 370 | }
|
|---|
| 371 |
|
|---|
| 372 | static int tos_char_stack(void)
|
|---|
| 373 | {
|
|---|
| 374 | if(!ncharstack)
|
|---|
| 375 | return 0;
|
|---|
| 376 | else
|
|---|
| 377 | return (int)(charstack[ncharstack-1] & 0xff);
|
|---|
| 378 | }
|
|---|
| 379 |
|
|---|
| 380 | static char *get_char_stack(void)
|
|---|
| 381 | {
|
|---|
| 382 | return charstack;
|
|---|
| 383 | }
|
|---|
| 384 |
|
|---|
| 385 | /*
|
|---|
| 386 | * Unicode character stack.
|
|---|
| 387 | * Used for general scanner.
|
|---|
| 388 | */
|
|---|
| 389 | static int nunicharstack = 0;
|
|---|
| 390 | static int allocunicharstack = 0;
|
|---|
| 391 | static WCHAR *unicharstack = NULL;
|
|---|
| 392 |
|
|---|
| 393 | static void empty_unichar_stack(void)
|
|---|
| 394 | {
|
|---|
| 395 | nunicharstack = 0;
|
|---|
| 396 | }
|
|---|
| 397 |
|
|---|
| 398 | static void push_unichar(int ch)
|
|---|
| 399 | {
|
|---|
| 400 | if(nunicharstack == allocunicharstack)
|
|---|
| 401 | {
|
|---|
| 402 | allocunicharstack += 128;
|
|---|
| 403 | unicharstack = xrealloc(unicharstack, allocunicharstack * sizeof(*unicharstack));
|
|---|
| 404 | }
|
|---|
| 405 | unicharstack[nunicharstack++] = (WCHAR)ch;
|
|---|
| 406 | }
|
|---|
| 407 |
|
|---|
| 408 | #if 0
|
|---|
| 409 | static int tos_unichar_stack(void)
|
|---|
| 410 | {
|
|---|
| 411 | if(!nunicharstack)
|
|---|
| 412 | return 0;
|
|---|
| 413 | else
|
|---|
| 414 | return (int)(unicharstack[nunicharstack-1] & 0xffff);
|
|---|
| 415 | }
|
|---|
| 416 | #endif
|
|---|
| 417 |
|
|---|
| 418 | static WCHAR *get_unichar_stack(void)
|
|---|
| 419 | {
|
|---|
| 420 | return unicharstack;
|
|---|
| 421 | }
|
|---|
| 422 |
|
|---|
| 423 | /*
|
|---|
| 424 | * Number scanner
|
|---|
| 425 | *
|
|---|
| 426 | * state | ch | next state
|
|---|
| 427 | * ------+-----------------+--------------------------
|
|---|
| 428 | * 0 | [0] | 1
|
|---|
| 429 | * 0 | [1-9] | 4
|
|---|
| 430 | * 0 | . | error (should never occur)
|
|---|
| 431 | * 1 | [xX] | 2
|
|---|
| 432 | * 1 | [0-7] | 3
|
|---|
| 433 | * 1 | [89a-wyzA-WYZ_] | error invalid digit
|
|---|
| 434 | * 1 | . | return 0
|
|---|
| 435 | * 2 | [0-9a-fA-F] | 2
|
|---|
| 436 | * 2 | [g-zG-Z_] | error invalid hex digit
|
|---|
| 437 | * 2 | . | return (hex-number) if TOS != [xX] else error
|
|---|
| 438 | * 3 | [0-7] | 3
|
|---|
| 439 | * 3 | [89a-zA-Z_] | error invalid octal digit
|
|---|
| 440 | * 3 | . | return (octal-number)
|
|---|
| 441 | * 4 | [0-9] | 4
|
|---|
| 442 | * 4 | [a-zA-Z_] | error invalid decimal digit
|
|---|
| 443 | * 4 | . | return (decimal-number)
|
|---|
| 444 | *
|
|---|
| 445 | * All non-identifier characters [^a-zA-Z_0-9] terminate the scan
|
|---|
| 446 | * and return the value. This is not entirely correct, but close
|
|---|
| 447 | * enough (should check punctuators as trailing context, but the
|
|---|
| 448 | * char_table is not adapted to that and it is questionable whether
|
|---|
| 449 | * it is worth the trouble).
|
|---|
| 450 | * All non-iso-8859-1 characters are an error.
|
|---|
| 451 | */
|
|---|
| 452 | static int scan_number(int ch)
|
|---|
| 453 | {
|
|---|
| 454 | int state = 0;
|
|---|
| 455 | int base = 10;
|
|---|
| 456 | empty_char_stack();
|
|---|
| 457 |
|
|---|
| 458 | while(1)
|
|---|
| 459 | {
|
|---|
| 460 | if(!isisochar(ch))
|
|---|
| 461 | xyyerror("Invalid digit");
|
|---|
| 462 |
|
|---|
| 463 | switch(state)
|
|---|
| 464 | {
|
|---|
| 465 | case 0:
|
|---|
| 466 | if(isdigit(ch))
|
|---|
| 467 | {
|
|---|
| 468 | push_char(ch);
|
|---|
| 469 | if(ch == '0')
|
|---|
| 470 | state = 1;
|
|---|
| 471 | else
|
|---|
| 472 | state = 4;
|
|---|
| 473 | }
|
|---|
| 474 | else
|
|---|
| 475 | internal_error(__FILE__, __LINE__, "Non-digit in first number-scanner state");
|
|---|
| 476 | break;
|
|---|
| 477 | case 1:
|
|---|
| 478 | if(ch == 'x' || ch == 'X')
|
|---|
| 479 | {
|
|---|
| 480 | push_char(ch);
|
|---|
| 481 | state = 2;
|
|---|
| 482 | }
|
|---|
| 483 | else if(ch >= '0' && ch <= '7')
|
|---|
| 484 | {
|
|---|
| 485 | push_char(ch);
|
|---|
| 486 | state = 3;
|
|---|
| 487 | }
|
|---|
| 488 | else if(isalpha(ch) || ch == '_')
|
|---|
| 489 | xyyerror("Invalid number digit");
|
|---|
| 490 | else
|
|---|
| 491 | {
|
|---|
| 492 | unget_unichar(ch);
|
|---|
| 493 | yylval.num = 0;
|
|---|
| 494 | return tNUMBER;
|
|---|
| 495 | }
|
|---|
| 496 | break;
|
|---|
| 497 | case 2:
|
|---|
| 498 | if(isxdigit(ch))
|
|---|
| 499 | push_char(ch);
|
|---|
| 500 | else if(isalpha(ch) || ch == '_' || !isxdigit(tos_char_stack()))
|
|---|
| 501 | xyyerror("Invalid hex digit");
|
|---|
| 502 | else
|
|---|
| 503 | {
|
|---|
| 504 | base = 16;
|
|---|
| 505 | goto finish;
|
|---|
| 506 | }
|
|---|
| 507 | break;
|
|---|
| 508 | case 3:
|
|---|
| 509 | if(ch >= '0' && ch <= '7')
|
|---|
| 510 | push_char(ch);
|
|---|
| 511 | else if(isalnum(ch) || ch == '_')
|
|---|
| 512 | xyyerror("Invalid octal digit");
|
|---|
| 513 | else
|
|---|
| 514 | {
|
|---|
| 515 | base = 8;
|
|---|
| 516 | goto finish;
|
|---|
| 517 | }
|
|---|
| 518 | break;
|
|---|
| 519 | case 4:
|
|---|
| 520 | if(isdigit(ch))
|
|---|
| 521 | push_char(ch);
|
|---|
| 522 | else if(isalnum(ch) || ch == '_')
|
|---|
| 523 | xyyerror("Invalid decimal digit");
|
|---|
| 524 | else
|
|---|
| 525 | {
|
|---|
| 526 | base = 10;
|
|---|
| 527 | goto finish;
|
|---|
| 528 | }
|
|---|
| 529 | break;
|
|---|
| 530 | default:
|
|---|
| 531 | internal_error(__FILE__, __LINE__, "Invalid state in number-scanner");
|
|---|
| 532 | }
|
|---|
| 533 | ch = get_unichar();
|
|---|
| 534 | }
|
|---|
| 535 | finish:
|
|---|
| 536 | unget_unichar(ch);
|
|---|
| 537 | push_char(0);
|
|---|
| 538 | yylval.num = strtoul(get_char_stack(), NULL, base);
|
|---|
| 539 | return tNUMBER;
|
|---|
| 540 | }
|
|---|
| 541 |
|
|---|
| 542 | static void newline(void)
|
|---|
| 543 | {
|
|---|
| 544 | line_number++;
|
|---|
| 545 | char_number = 1;
|
|---|
| 546 | }
|
|---|
| 547 |
|
|---|
| 548 | static int unisort(const void *p1, const void *p2)
|
|---|
| 549 | {
|
|---|
| 550 | return unistricmp(((token_t *)p1)->name, ((token_t *)p2)->name);
|
|---|
| 551 | }
|
|---|
| 552 |
|
|---|
| 553 | static token_t *tokentable = NULL;
|
|---|
| 554 | static int ntokentable = 0;
|
|---|
| 555 |
|
|---|
| 556 | token_t *lookup_token(const WCHAR *s)
|
|---|
| 557 | {
|
|---|
| 558 | token_t tok;
|
|---|
| 559 |
|
|---|
| 560 | tok.name = s;
|
|---|
| 561 | return (token_t *)bsearch(&tok, tokentable, ntokentable, sizeof(*tokentable), unisort);
|
|---|
| 562 | }
|
|---|
| 563 |
|
|---|
| 564 | void add_token(tok_e type, const WCHAR *name, int tok, int cp, const WCHAR *alias, int fix)
|
|---|
| 565 | {
|
|---|
| 566 | ntokentable++;
|
|---|
| 567 | tokentable = xrealloc(tokentable, ntokentable * sizeof(*tokentable));
|
|---|
| 568 | tokentable[ntokentable-1].type = type;
|
|---|
| 569 | tokentable[ntokentable-1].name = name;
|
|---|
| 570 | tokentable[ntokentable-1].token = tok;
|
|---|
| 571 | tokentable[ntokentable-1].codepage = cp;
|
|---|
| 572 | tokentable[ntokentable-1].alias = alias;
|
|---|
| 573 | tokentable[ntokentable-1].fixed = fix;
|
|---|
| 574 | qsort(tokentable, ntokentable, sizeof(*tokentable), unisort);
|
|---|
| 575 | }
|
|---|
| 576 |
|
|---|
| 577 | void get_tokentable(token_t **tab, int *len)
|
|---|
| 578 | {
|
|---|
| 579 | assert(tab != NULL);
|
|---|
| 580 | assert(len != NULL);
|
|---|
| 581 | *tab = tokentable;
|
|---|
| 582 | *len = ntokentable;
|
|---|
| 583 | }
|
|---|
| 584 |
|
|---|
| 585 | /*
|
|---|
| 586 | * The scanner
|
|---|
| 587 | *
|
|---|
| 588 | */
|
|---|
| 589 | int yylex(void)
|
|---|
| 590 | {
|
|---|
| 591 | static WCHAR ustr_dot1[] = { '.', '\n', 0 };
|
|---|
| 592 | static WCHAR ustr_dot2[] = { '.', '\r', '\n', 0 };
|
|---|
| 593 | static int isinit = 0;
|
|---|
| 594 | int ch;
|
|---|
| 595 |
|
|---|
| 596 | if(!isinit)
|
|---|
| 597 | {
|
|---|
| 598 | isinit++;
|
|---|
| 599 | set_codepage(WMC_DEFAULT_CODEPAGE);
|
|---|
| 600 | add_token(tok_keyword, ustr_codepages, tCODEPAGE, 0, NULL, 0);
|
|---|
| 601 | add_token(tok_keyword, ustr_facility, tFACILITY, 0, NULL, 1);
|
|---|
| 602 | add_token(tok_keyword, ustr_facilitynames, tFACNAMES, 0, NULL, 1);
|
|---|
| 603 | add_token(tok_keyword, ustr_language, tLANGUAGE, 0, NULL, 1);
|
|---|
| 604 | add_token(tok_keyword, ustr_languagenames, tLANNAMES, 0, NULL, 1);
|
|---|
| 605 | add_token(tok_keyword, ustr_messageid, tMSGID, 0, NULL, 1);
|
|---|
| 606 | add_token(tok_keyword, ustr_messageidtypedef, tTYPEDEF, 0, NULL, 1);
|
|---|
| 607 | add_token(tok_keyword, ustr_outputbase, tBASE, 0, NULL, 1);
|
|---|
| 608 | add_token(tok_keyword, ustr_severity, tSEVERITY, 0, NULL, 1);
|
|---|
| 609 | add_token(tok_keyword, ustr_severitynames, tSEVNAMES, 0, NULL, 1);
|
|---|
| 610 | add_token(tok_keyword, ustr_symbolicname, tSYMNAME, 0, NULL, 1);
|
|---|
| 611 | add_token(tok_severity, ustr_error, 0x03, 0, NULL, 0);
|
|---|
| 612 | add_token(tok_severity, ustr_warning, 0x02, 0, NULL, 0);
|
|---|
| 613 | add_token(tok_severity, ustr_informational, 0x01, 0, NULL, 0);
|
|---|
| 614 | add_token(tok_severity, ustr_success, 0x00, 0, NULL, 0);
|
|---|
| 615 | add_token(tok_facility, ustr_application, 0xFFF, 0, NULL, 0);
|
|---|
| 616 | add_token(tok_facility, ustr_system, 0x0FF, 0, NULL, 0);
|
|---|
| 617 | add_token(tok_language, ustr_english, 0x409, 437, ustr_msg00001, 0);
|
|---|
| 618 | }
|
|---|
| 619 |
|
|---|
| 620 | empty_unichar_stack();
|
|---|
| 621 |
|
|---|
| 622 | while(1)
|
|---|
| 623 | {
|
|---|
| 624 | if(want_line)
|
|---|
| 625 | {
|
|---|
| 626 | while((ch = get_unichar()) != '\n')
|
|---|
| 627 | {
|
|---|
| 628 | if(ch == EOF)
|
|---|
| 629 | xyyerror("Unexpected EOF");
|
|---|
| 630 | push_unichar(ch);
|
|---|
| 631 | }
|
|---|
| 632 | newline();
|
|---|
| 633 | push_unichar(ch);
|
|---|
| 634 | push_unichar(0);
|
|---|
| 635 | if(!unistrcmp(ustr_dot1, get_unichar_stack()) || !unistrcmp(ustr_dot2, get_unichar_stack()))
|
|---|
| 636 | {
|
|---|
| 637 | want_line = 0;
|
|---|
| 638 | /* Reset the codepage to our default after each message */
|
|---|
| 639 | set_codepage(WMC_DEFAULT_CODEPAGE);
|
|---|
| 640 | return tMSGEND;
|
|---|
| 641 | }
|
|---|
| 642 | yylval.str = xunistrdup(get_unichar_stack());
|
|---|
| 643 | return tLINE;
|
|---|
| 644 | }
|
|---|
| 645 |
|
|---|
| 646 | ch = get_unichar();
|
|---|
| 647 |
|
|---|
| 648 | if(ch == EOF)
|
|---|
| 649 | return EOF;
|
|---|
| 650 |
|
|---|
| 651 | if(ch == '\n')
|
|---|
| 652 | {
|
|---|
| 653 | newline();
|
|---|
| 654 | if(want_nl)
|
|---|
| 655 | {
|
|---|
| 656 | want_nl = 0;
|
|---|
| 657 | return tNL;
|
|---|
| 658 | }
|
|---|
| 659 | continue;
|
|---|
| 660 | }
|
|---|
| 661 |
|
|---|
| 662 | if(isisochar(ch))
|
|---|
| 663 | {
|
|---|
| 664 | if(want_file)
|
|---|
| 665 | {
|
|---|
| 666 | int n = 0;
|
|---|
| 667 | while(n < 8 && isisochar(ch))
|
|---|
| 668 | {
|
|---|
| 669 | int t = char_table[ch];
|
|---|
| 670 | if((t & CH_PUNCT) || !(t & CH_SHORTNAME))
|
|---|
| 671 | break;
|
|---|
| 672 |
|
|---|
| 673 | push_unichar(ch);
|
|---|
| 674 | n++;
|
|---|
| 675 | ch = get_unichar();
|
|---|
| 676 | }
|
|---|
| 677 | unget_unichar(ch);
|
|---|
| 678 | push_unichar(0);
|
|---|
| 679 | want_file = 0;
|
|---|
| 680 | yylval.str = xunistrdup(get_unichar_stack());
|
|---|
| 681 | return tFILE;
|
|---|
| 682 | }
|
|---|
| 683 |
|
|---|
| 684 | if(char_table[ch] & CH_IDENT)
|
|---|
| 685 | {
|
|---|
| 686 | token_t *tok;
|
|---|
| 687 | while(isisochar(ch) && (char_table[ch] & (CH_IDENT|CH_NUMBER)))
|
|---|
| 688 | {
|
|---|
| 689 | push_unichar(ch);
|
|---|
| 690 | ch = get_unichar();
|
|---|
| 691 | }
|
|---|
| 692 | unget_unichar(ch);
|
|---|
| 693 | push_unichar(0);
|
|---|
| 694 | if(!(tok = lookup_token(get_unichar_stack())))
|
|---|
| 695 | {
|
|---|
| 696 | yylval.str = xunistrdup(get_unichar_stack());
|
|---|
| 697 | return tIDENT;
|
|---|
| 698 | }
|
|---|
| 699 | switch(tok->type)
|
|---|
| 700 | {
|
|---|
| 701 | case tok_keyword:
|
|---|
| 702 | return tok->token;
|
|---|
| 703 |
|
|---|
| 704 | case tok_language:
|
|---|
| 705 | codepage = tok->codepage;
|
|---|
| 706 | /* Fall through */
|
|---|
| 707 | case tok_severity:
|
|---|
| 708 | case tok_facility:
|
|---|
| 709 | yylval.tok = tok;
|
|---|
| 710 | return tTOKEN;
|
|---|
| 711 |
|
|---|
| 712 | default:
|
|---|
| 713 | internal_error(__FILE__, __LINE__, "Invalid token type encountered");
|
|---|
| 714 | }
|
|---|
| 715 | }
|
|---|
| 716 |
|
|---|
| 717 | if(isspace(ch)) /* Ignore space */
|
|---|
| 718 | continue;
|
|---|
| 719 |
|
|---|
| 720 | if(isdigit(ch))
|
|---|
| 721 | return scan_number(ch);
|
|---|
| 722 | }
|
|---|
| 723 |
|
|---|
| 724 | switch(ch)
|
|---|
| 725 | {
|
|---|
| 726 | case ':':
|
|---|
| 727 | case '=':
|
|---|
| 728 | case '+':
|
|---|
| 729 | case '(':
|
|---|
| 730 | case ')':
|
|---|
| 731 | return ch;
|
|---|
| 732 | case ';':
|
|---|
| 733 | while(ch != '\n' && ch != EOF)
|
|---|
| 734 | {
|
|---|
| 735 | push_unichar(ch);
|
|---|
| 736 | ch = get_unichar();
|
|---|
| 737 | }
|
|---|
| 738 | newline();
|
|---|
| 739 | push_unichar(ch); /* Include the newline */
|
|---|
| 740 | push_unichar(0);
|
|---|
| 741 | yylval.str = xunistrdup(get_unichar_stack());
|
|---|
| 742 | return tCOMMENT;
|
|---|
| 743 | default:
|
|---|
| 744 | xyyerror("Invalid character '%c' (0x%04x)", isisochar(ch) && isprint(ch) ? ch : '.', ch);
|
|---|
| 745 | }
|
|---|
| 746 | }
|
|---|
| 747 | }
|
|---|