[205] | 1 | /*
|
---|
| 2 | ** 2001 September 15
|
---|
| 3 | **
|
---|
| 4 | ** The author disclaims copyright to this source code. In place of
|
---|
| 5 | ** a legal notice, here is a blessing:
|
---|
| 6 | **
|
---|
| 7 | ** May you do good and not evil.
|
---|
| 8 | ** May you find forgiveness for yourself and forgive others.
|
---|
| 9 | ** May you share freely, never taking more than you give.
|
---|
| 10 | **
|
---|
| 11 | *************************************************************************
|
---|
| 12 | ** An tokenizer for SQL
|
---|
| 13 | **
|
---|
| 14 | ** This file contains C code that splits an SQL input string up into
|
---|
| 15 | ** individual tokens and sends those tokens one-by-one over to the
|
---|
| 16 | ** parser for analysis.
|
---|
| 17 | **
|
---|
| 18 | ** $Id: tokenize.c,v 1.68 2004/02/14 23:59:58 drh Exp $
|
---|
| 19 | */
|
---|
| 20 | #include "sqliteInt.h"
|
---|
| 21 | #include "os.h"
|
---|
| 22 | #include <ctype.h>
|
---|
| 23 | #include <stdlib.h>
|
---|
| 24 |
|
---|
| 25 | /*
|
---|
| 26 | ** All the keywords of the SQL language are stored as in a hash
|
---|
| 27 | ** table composed of instances of the following structure.
|
---|
| 28 | */
|
---|
| 29 | typedef struct Keyword Keyword;
|
---|
| 30 | struct Keyword {
|
---|
| 31 | char *zName; /* The keyword name */
|
---|
| 32 | u8 tokenType; /* Token value for this keyword */
|
---|
| 33 | u8 len; /* Length of this keyword */
|
---|
| 34 | u8 iNext; /* Index in aKeywordTable[] of next with same hash */
|
---|
| 35 | };
|
---|
| 36 |
|
---|
| 37 | /*
|
---|
| 38 | ** These are the keywords
|
---|
| 39 | */
|
---|
| 40 | static Keyword aKeywordTable[] = {
|
---|
| 41 | { "ABORT", TK_ABORT, },
|
---|
| 42 | { "AFTER", TK_AFTER, },
|
---|
| 43 | { "ALL", TK_ALL, },
|
---|
| 44 | { "AND", TK_AND, },
|
---|
| 45 | { "AS", TK_AS, },
|
---|
| 46 | { "ASC", TK_ASC, },
|
---|
| 47 | { "ATTACH", TK_ATTACH, },
|
---|
| 48 | { "BEFORE", TK_BEFORE, },
|
---|
| 49 | { "BEGIN", TK_BEGIN, },
|
---|
| 50 | { "BETWEEN", TK_BETWEEN, },
|
---|
| 51 | { "BY", TK_BY, },
|
---|
| 52 | { "CASCADE", TK_CASCADE, },
|
---|
| 53 | { "CASE", TK_CASE, },
|
---|
| 54 | { "CHECK", TK_CHECK, },
|
---|
| 55 | { "CLUSTER", TK_CLUSTER, },
|
---|
| 56 | { "COLLATE", TK_COLLATE, },
|
---|
| 57 | { "COMMIT", TK_COMMIT, },
|
---|
| 58 | { "CONFLICT", TK_CONFLICT, },
|
---|
| 59 | { "CONSTRAINT", TK_CONSTRAINT, },
|
---|
| 60 | { "COPY", TK_COPY, },
|
---|
| 61 | { "CREATE", TK_CREATE, },
|
---|
| 62 | { "CROSS", TK_JOIN_KW, },
|
---|
| 63 | { "DATABASE", TK_DATABASE, },
|
---|
| 64 | { "DEFAULT", TK_DEFAULT, },
|
---|
| 65 | { "DEFERRED", TK_DEFERRED, },
|
---|
| 66 | { "DEFERRABLE", TK_DEFERRABLE, },
|
---|
| 67 | { "DELETE", TK_DELETE, },
|
---|
| 68 | { "DELIMITERS", TK_DELIMITERS, },
|
---|
| 69 | { "DESC", TK_DESC, },
|
---|
| 70 | { "DETACH", TK_DETACH, },
|
---|
| 71 | { "DISTINCT", TK_DISTINCT, },
|
---|
| 72 | { "DROP", TK_DROP, },
|
---|
| 73 | { "END", TK_END, },
|
---|
| 74 | { "EACH", TK_EACH, },
|
---|
| 75 | { "ELSE", TK_ELSE, },
|
---|
| 76 | { "EXCEPT", TK_EXCEPT, },
|
---|
| 77 | { "EXPLAIN", TK_EXPLAIN, },
|
---|
| 78 | { "FAIL", TK_FAIL, },
|
---|
| 79 | { "FOR", TK_FOR, },
|
---|
| 80 | { "FOREIGN", TK_FOREIGN, },
|
---|
| 81 | { "FROM", TK_FROM, },
|
---|
| 82 | { "FULL", TK_JOIN_KW, },
|
---|
| 83 | { "GLOB", TK_GLOB, },
|
---|
| 84 | { "GROUP", TK_GROUP, },
|
---|
| 85 | { "HAVING", TK_HAVING, },
|
---|
| 86 | { "IGNORE", TK_IGNORE, },
|
---|
| 87 | { "IMMEDIATE", TK_IMMEDIATE, },
|
---|
| 88 | { "IN", TK_IN, },
|
---|
| 89 | { "INDEX", TK_INDEX, },
|
---|
| 90 | { "INITIALLY", TK_INITIALLY, },
|
---|
| 91 | { "INNER", TK_JOIN_KW, },
|
---|
| 92 | { "INSERT", TK_INSERT, },
|
---|
| 93 | { "INSTEAD", TK_INSTEAD, },
|
---|
| 94 | { "INTERSECT", TK_INTERSECT, },
|
---|
| 95 | { "INTO", TK_INTO, },
|
---|
| 96 | { "IS", TK_IS, },
|
---|
| 97 | { "ISNULL", TK_ISNULL, },
|
---|
| 98 | { "JOIN", TK_JOIN, },
|
---|
| 99 | { "KEY", TK_KEY, },
|
---|
| 100 | { "LEFT", TK_JOIN_KW, },
|
---|
| 101 | { "LIKE", TK_LIKE, },
|
---|
| 102 | { "LIMIT", TK_LIMIT, },
|
---|
| 103 | { "MATCH", TK_MATCH, },
|
---|
| 104 | { "NATURAL", TK_JOIN_KW, },
|
---|
| 105 | { "NOT", TK_NOT, },
|
---|
| 106 | { "NOTNULL", TK_NOTNULL, },
|
---|
| 107 | { "NULL", TK_NULL, },
|
---|
| 108 | { "OF", TK_OF, },
|
---|
| 109 | { "OFFSET", TK_OFFSET, },
|
---|
| 110 | { "ON", TK_ON, },
|
---|
| 111 | { "OR", TK_OR, },
|
---|
| 112 | { "ORDER", TK_ORDER, },
|
---|
| 113 | { "OUTER", TK_JOIN_KW, },
|
---|
| 114 | { "PRAGMA", TK_PRAGMA, },
|
---|
| 115 | { "PRIMARY", TK_PRIMARY, },
|
---|
| 116 | { "RAISE", TK_RAISE, },
|
---|
| 117 | { "REFERENCES", TK_REFERENCES, },
|
---|
| 118 | { "REPLACE", TK_REPLACE, },
|
---|
| 119 | { "RESTRICT", TK_RESTRICT, },
|
---|
| 120 | { "RIGHT", TK_JOIN_KW, },
|
---|
| 121 | { "ROLLBACK", TK_ROLLBACK, },
|
---|
| 122 | { "ROW", TK_ROW, },
|
---|
| 123 | { "SELECT", TK_SELECT, },
|
---|
| 124 | { "SET", TK_SET, },
|
---|
| 125 | { "STATEMENT", TK_STATEMENT, },
|
---|
| 126 | { "TABLE", TK_TABLE, },
|
---|
| 127 | { "TEMP", TK_TEMP, },
|
---|
| 128 | { "TEMPORARY", TK_TEMP, },
|
---|
| 129 | { "THEN", TK_THEN, },
|
---|
| 130 | { "TRANSACTION", TK_TRANSACTION, },
|
---|
| 131 | { "TRIGGER", TK_TRIGGER, },
|
---|
| 132 | { "UNION", TK_UNION, },
|
---|
| 133 | { "UNIQUE", TK_UNIQUE, },
|
---|
| 134 | { "UPDATE", TK_UPDATE, },
|
---|
| 135 | { "USING", TK_USING, },
|
---|
| 136 | { "VACUUM", TK_VACUUM, },
|
---|
| 137 | { "VALUES", TK_VALUES, },
|
---|
| 138 | { "VIEW", TK_VIEW, },
|
---|
| 139 | { "WHEN", TK_WHEN, },
|
---|
| 140 | { "WHERE", TK_WHERE, },
|
---|
| 141 | };
|
---|
| 142 |
|
---|
| 143 | /*
|
---|
| 144 | ** This is the hash table
|
---|
| 145 | */
|
---|
| 146 | #define KEY_HASH_SIZE 101
|
---|
| 147 | static u8 aiHashTable[KEY_HASH_SIZE];
|
---|
| 148 |
|
---|
| 149 |
|
---|
| 150 | /*
|
---|
| 151 | ** This function looks up an identifier to determine if it is a
|
---|
| 152 | ** keyword. If it is a keyword, the token code of that keyword is
|
---|
| 153 | ** returned. If the input is not a keyword, TK_ID is returned.
|
---|
| 154 | */
|
---|
| 155 | int sqliteKeywordCode(const char *z, int n){
|
---|
| 156 | int h, i;
|
---|
| 157 | Keyword *p;
|
---|
| 158 | static char needInit = 1;
|
---|
| 159 | if( needInit ){
|
---|
| 160 | /* Initialize the keyword hash table */
|
---|
| 161 | sqliteOsEnterMutex();
|
---|
| 162 | if( needInit ){
|
---|
| 163 | int nk;
|
---|
| 164 | nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
|
---|
| 165 | for(i=0; i<nk; i++){
|
---|
| 166 | aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
|
---|
| 167 | h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
|
---|
| 168 | h %= KEY_HASH_SIZE;
|
---|
| 169 | aKeywordTable[i].iNext = aiHashTable[h];
|
---|
| 170 | aiHashTable[h] = i+1;
|
---|
| 171 | }
|
---|
| 172 | needInit = 0;
|
---|
| 173 | }
|
---|
| 174 | sqliteOsLeaveMutex();
|
---|
| 175 | }
|
---|
| 176 | h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
|
---|
| 177 | for(i=aiHashTable[h]; i; i=p->iNext){
|
---|
| 178 | p = &aKeywordTable[i-1];
|
---|
| 179 | if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
|
---|
| 180 | return p->tokenType;
|
---|
| 181 | }
|
---|
| 182 | }
|
---|
| 183 | return TK_ID;
|
---|
| 184 | }
|
---|
| 185 |
|
---|
| 186 |
|
---|
| 187 | /*
|
---|
| 188 | ** If X is a character that can be used in an identifier and
|
---|
| 189 | ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then
|
---|
| 190 | ** X is always an identifier character. (Hence all UTF-8
|
---|
| 191 | ** characters can be part of an identifier). isIdChar[X] will
|
---|
| 192 | ** be 0 for every character in the lower 128 ASCII characters
|
---|
| 193 | ** that cannot be used as part of an identifier.
|
---|
| 194 | **
|
---|
| 195 | ** In this implementation, an identifier can be a string of
|
---|
| 196 | ** alphabetic characters, digits, and "_" plus any character
|
---|
| 197 | ** with the high-order bit set. The latter rule means that
|
---|
| 198 | ** any sequence of UTF-8 characters or characters taken from
|
---|
| 199 | ** an extended ISO8859 character set can form an identifier.
|
---|
| 200 | */
|
---|
| 201 | static const char isIdChar[] = {
|
---|
| 202 | /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
---|
| 203 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
---|
| 204 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
---|
| 205 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
|
---|
| 206 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
---|
| 207 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
---|
| 208 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
---|
| 209 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
---|
| 210 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
---|
| 211 | };
|
---|
| 212 |
|
---|
| 213 |
|
---|
| 214 | /*
|
---|
| 215 | ** Return the length of the token that begins at z[0].
|
---|
| 216 | ** Store the token type in *tokenType before returning.
|
---|
| 217 | */
|
---|
| 218 | static int sqliteGetToken(const unsigned char *z, int *tokenType){
|
---|
| 219 | int i;
|
---|
| 220 | switch( *z ){
|
---|
| 221 | case ' ': case '\t': case '\n': case '\f': case '\r': {
|
---|
| 222 | for(i=1; isspace(z[i]); i++){}
|
---|
| 223 | *tokenType = TK_SPACE;
|
---|
| 224 | return i;
|
---|
| 225 | }
|
---|
| 226 | case '-': {
|
---|
| 227 | if( z[1]=='-' ){
|
---|
| 228 | for(i=2; z[i] && z[i]!='\n'; i++){}
|
---|
| 229 | *tokenType = TK_COMMENT;
|
---|
| 230 | return i;
|
---|
| 231 | }
|
---|
| 232 | *tokenType = TK_MINUS;
|
---|
| 233 | return 1;
|
---|
| 234 | }
|
---|
| 235 | case '(': {
|
---|
| 236 | *tokenType = TK_LP;
|
---|
| 237 | return 1;
|
---|
| 238 | }
|
---|
| 239 | case ')': {
|
---|
| 240 | *tokenType = TK_RP;
|
---|
| 241 | return 1;
|
---|
| 242 | }
|
---|
| 243 | case ';': {
|
---|
| 244 | *tokenType = TK_SEMI;
|
---|
| 245 | return 1;
|
---|
| 246 | }
|
---|
| 247 | case '+': {
|
---|
| 248 | *tokenType = TK_PLUS;
|
---|
| 249 | return 1;
|
---|
| 250 | }
|
---|
| 251 | case '*': {
|
---|
| 252 | *tokenType = TK_STAR;
|
---|
| 253 | return 1;
|
---|
| 254 | }
|
---|
| 255 | case '/': {
|
---|
| 256 | if( z[1]!='*' || z[2]==0 ){
|
---|
| 257 | *tokenType = TK_SLASH;
|
---|
| 258 | return 1;
|
---|
| 259 | }
|
---|
| 260 | for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){}
|
---|
| 261 | if( z[i] ) i++;
|
---|
| 262 | *tokenType = TK_COMMENT;
|
---|
| 263 | return i;
|
---|
| 264 | }
|
---|
| 265 | case '%': {
|
---|
| 266 | *tokenType = TK_REM;
|
---|
| 267 | return 1;
|
---|
| 268 | }
|
---|
| 269 | case '=': {
|
---|
| 270 | *tokenType = TK_EQ;
|
---|
| 271 | return 1 + (z[1]=='=');
|
---|
| 272 | }
|
---|
| 273 | case '<': {
|
---|
| 274 | if( z[1]=='=' ){
|
---|
| 275 | *tokenType = TK_LE;
|
---|
| 276 | return 2;
|
---|
| 277 | }else if( z[1]=='>' ){
|
---|
| 278 | *tokenType = TK_NE;
|
---|
| 279 | return 2;
|
---|
| 280 | }else if( z[1]=='<' ){
|
---|
| 281 | *tokenType = TK_LSHIFT;
|
---|
| 282 | return 2;
|
---|
| 283 | }else{
|
---|
| 284 | *tokenType = TK_LT;
|
---|
| 285 | return 1;
|
---|
| 286 | }
|
---|
| 287 | }
|
---|
| 288 | case '>': {
|
---|
| 289 | if( z[1]=='=' ){
|
---|
| 290 | *tokenType = TK_GE;
|
---|
| 291 | return 2;
|
---|
| 292 | }else if( z[1]=='>' ){
|
---|
| 293 | *tokenType = TK_RSHIFT;
|
---|
| 294 | return 2;
|
---|
| 295 | }else{
|
---|
| 296 | *tokenType = TK_GT;
|
---|
| 297 | return 1;
|
---|
| 298 | }
|
---|
| 299 | }
|
---|
| 300 | case '!': {
|
---|
| 301 | if( z[1]!='=' ){
|
---|
| 302 | *tokenType = TK_ILLEGAL;
|
---|
| 303 | return 2;
|
---|
| 304 | }else{
|
---|
| 305 | *tokenType = TK_NE;
|
---|
| 306 | return 2;
|
---|
| 307 | }
|
---|
| 308 | }
|
---|
| 309 | case '|': {
|
---|
| 310 | if( z[1]!='|' ){
|
---|
| 311 | *tokenType = TK_BITOR;
|
---|
| 312 | return 1;
|
---|
| 313 | }else{
|
---|
| 314 | *tokenType = TK_CONCAT;
|
---|
| 315 | return 2;
|
---|
| 316 | }
|
---|
| 317 | }
|
---|
| 318 | case ',': {
|
---|
| 319 | *tokenType = TK_COMMA;
|
---|
| 320 | return 1;
|
---|
| 321 | }
|
---|
| 322 | case '&': {
|
---|
| 323 | *tokenType = TK_BITAND;
|
---|
| 324 | return 1;
|
---|
| 325 | }
|
---|
| 326 | case '~': {
|
---|
| 327 | *tokenType = TK_BITNOT;
|
---|
| 328 | return 1;
|
---|
| 329 | }
|
---|
| 330 | case '\'': case '"': {
|
---|
| 331 | int delim = z[0];
|
---|
| 332 | for(i=1; z[i]; i++){
|
---|
| 333 | if( z[i]==delim ){
|
---|
| 334 | if( z[i+1]==delim ){
|
---|
| 335 | i++;
|
---|
| 336 | }else{
|
---|
| 337 | break;
|
---|
| 338 | }
|
---|
| 339 | }
|
---|
| 340 | }
|
---|
| 341 | if( z[i] ) i++;
|
---|
| 342 | *tokenType = TK_STRING;
|
---|
| 343 | return i;
|
---|
| 344 | }
|
---|
| 345 | case '.': {
|
---|
| 346 | *tokenType = TK_DOT;
|
---|
| 347 | return 1;
|
---|
| 348 | }
|
---|
| 349 | case '0': case '1': case '2': case '3': case '4':
|
---|
| 350 | case '5': case '6': case '7': case '8': case '9': {
|
---|
| 351 | *tokenType = TK_INTEGER;
|
---|
| 352 | for(i=1; isdigit(z[i]); i++){}
|
---|
| 353 | if( z[i]=='.' && isdigit(z[i+1]) ){
|
---|
| 354 | i += 2;
|
---|
| 355 | while( isdigit(z[i]) ){ i++; }
|
---|
| 356 | *tokenType = TK_FLOAT;
|
---|
| 357 | }
|
---|
| 358 | if( (z[i]=='e' || z[i]=='E') &&
|
---|
| 359 | ( isdigit(z[i+1])
|
---|
| 360 | || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
|
---|
| 361 | )
|
---|
| 362 | ){
|
---|
| 363 | i += 2;
|
---|
| 364 | while( isdigit(z[i]) ){ i++; }
|
---|
| 365 | *tokenType = TK_FLOAT;
|
---|
| 366 | }
|
---|
| 367 | return i;
|
---|
| 368 | }
|
---|
| 369 | case '[': {
|
---|
| 370 | for(i=1; z[i] && z[i-1]!=']'; i++){}
|
---|
| 371 | *tokenType = TK_ID;
|
---|
| 372 | return i;
|
---|
| 373 | }
|
---|
| 374 | case '?': {
|
---|
| 375 | *tokenType = TK_VARIABLE;
|
---|
| 376 | return 1;
|
---|
| 377 | }
|
---|
| 378 | default: {
|
---|
| 379 | if( (*z&0x80)==0 && !isIdChar[*z] ){
|
---|
| 380 | break;
|
---|
| 381 | }
|
---|
| 382 | for(i=1; (z[i]&0x80)!=0 || isIdChar[z[i]]; i++){}
|
---|
| 383 | *tokenType = sqliteKeywordCode((char*)z, i);
|
---|
| 384 | return i;
|
---|
| 385 | }
|
---|
| 386 | }
|
---|
| 387 | *tokenType = TK_ILLEGAL;
|
---|
| 388 | return 1;
|
---|
| 389 | }
|
---|
| 390 |
|
---|
| 391 | /*
|
---|
| 392 | ** Run the parser on the given SQL string. The parser structure is
|
---|
| 393 | ** passed in. An SQLITE_ status code is returned. If an error occurs
|
---|
| 394 | ** and pzErrMsg!=NULL then an error message might be written into
|
---|
| 395 | ** memory obtained from malloc() and *pzErrMsg made to point to that
|
---|
| 396 | ** error message. Or maybe not.
|
---|
| 397 | */
|
---|
| 398 | int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
|
---|
| 399 | int nErr = 0;
|
---|
| 400 | int i;
|
---|
| 401 | void *pEngine;
|
---|
| 402 | int tokenType;
|
---|
| 403 | int lastTokenParsed = -1;
|
---|
| 404 | sqlite *db = pParse->db;
|
---|
| 405 | extern void *sqliteParserAlloc(void*(*)(int));
|
---|
| 406 | extern void sqliteParserFree(void*, void(*)(void*));
|
---|
| 407 | extern int sqliteParser(void*, int, Token, Parse*);
|
---|
| 408 |
|
---|
| 409 | db->flags &= ~SQLITE_Interrupt;
|
---|
| 410 | pParse->rc = SQLITE_OK;
|
---|
| 411 | i = 0;
|
---|
| 412 | pEngine = sqliteParserAlloc((void*(*)(int))malloc);
|
---|
| 413 | if( pEngine==0 ){
|
---|
| 414 | sqliteSetString(pzErrMsg, "out of memory", (char*)0);
|
---|
| 415 | return 1;
|
---|
| 416 | }
|
---|
| 417 | pParse->sLastToken.dyn = 0;
|
---|
| 418 | pParse->zTail = zSql;
|
---|
| 419 | while( sqlite_malloc_failed==0 && zSql[i]!=0 ){
|
---|
| 420 | assert( i>=0 );
|
---|
| 421 | pParse->sLastToken.z = &zSql[i];
|
---|
| 422 | assert( pParse->sLastToken.dyn==0 );
|
---|
| 423 | pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
|
---|
| 424 | i += pParse->sLastToken.n;
|
---|
| 425 | switch( tokenType ){
|
---|
| 426 | case TK_SPACE:
|
---|
| 427 | case TK_COMMENT: {
|
---|
| 428 | if( (db->flags & SQLITE_Interrupt)!=0 ){
|
---|
| 429 | pParse->rc = SQLITE_INTERRUPT;
|
---|
| 430 | sqliteSetString(pzErrMsg, "interrupt", (char*)0);
|
---|
| 431 | goto abort_parse;
|
---|
| 432 | }
|
---|
| 433 | break;
|
---|
| 434 | }
|
---|
| 435 | case TK_ILLEGAL: {
|
---|
| 436 | sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
|
---|
| 437 | pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
|
---|
| 438 | nErr++;
|
---|
| 439 | goto abort_parse;
|
---|
| 440 | }
|
---|
| 441 | case TK_SEMI: {
|
---|
| 442 | pParse->zTail = &zSql[i];
|
---|
| 443 | /* Fall thru into the default case */
|
---|
| 444 | }
|
---|
| 445 | default: {
|
---|
| 446 | sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
|
---|
| 447 | lastTokenParsed = tokenType;
|
---|
| 448 | if( pParse->rc!=SQLITE_OK ){
|
---|
| 449 | goto abort_parse;
|
---|
| 450 | }
|
---|
| 451 | break;
|
---|
| 452 | }
|
---|
| 453 | }
|
---|
| 454 | }
|
---|
| 455 | abort_parse:
|
---|
| 456 | if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
|
---|
| 457 | if( lastTokenParsed!=TK_SEMI ){
|
---|
| 458 | sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
|
---|
| 459 | pParse->zTail = &zSql[i];
|
---|
| 460 | }
|
---|
| 461 | sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
|
---|
| 462 | }
|
---|
| 463 | sqliteParserFree(pEngine, free);
|
---|
| 464 | if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
|
---|
| 465 | sqliteSetString(&pParse->zErrMsg, sqlite_error_string(pParse->rc),
|
---|
| 466 | (char*)0);
|
---|
| 467 | }
|
---|
| 468 | if( pParse->zErrMsg ){
|
---|
| 469 | if( pzErrMsg && *pzErrMsg==0 ){
|
---|
| 470 | *pzErrMsg = pParse->zErrMsg;
|
---|
| 471 | }else{
|
---|
| 472 | sqliteFree(pParse->zErrMsg);
|
---|
| 473 | }
|
---|
| 474 | pParse->zErrMsg = 0;
|
---|
| 475 | if( !nErr ) nErr++;
|
---|
| 476 | }
|
---|
| 477 | if( pParse->pVdbe && pParse->nErr>0 ){
|
---|
| 478 | sqliteVdbeDelete(pParse->pVdbe);
|
---|
| 479 | pParse->pVdbe = 0;
|
---|
| 480 | }
|
---|
| 481 | if( pParse->pNewTable ){
|
---|
| 482 | sqliteDeleteTable(pParse->db, pParse->pNewTable);
|
---|
| 483 | pParse->pNewTable = 0;
|
---|
| 484 | }
|
---|
| 485 | if( pParse->pNewTrigger ){
|
---|
| 486 | sqliteDeleteTrigger(pParse->pNewTrigger);
|
---|
| 487 | pParse->pNewTrigger = 0;
|
---|
| 488 | }
|
---|
| 489 | if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
|
---|
| 490 | pParse->rc = SQLITE_ERROR;
|
---|
| 491 | }
|
---|
| 492 | return nErr;
|
---|
| 493 | }
|
---|
| 494 |
|
---|
| 495 | /*
|
---|
| 496 | ** Token types used by the sqlite_complete() routine. See the header
|
---|
| 497 | ** comments on that procedure for additional information.
|
---|
| 498 | */
|
---|
| 499 | #define tkEXPLAIN 0
|
---|
| 500 | #define tkCREATE 1
|
---|
| 501 | #define tkTEMP 2
|
---|
| 502 | #define tkTRIGGER 3
|
---|
| 503 | #define tkEND 4
|
---|
| 504 | #define tkSEMI 5
|
---|
| 505 | #define tkWS 6
|
---|
| 506 | #define tkOTHER 7
|
---|
| 507 |
|
---|
| 508 | /*
|
---|
| 509 | ** Return TRUE if the given SQL string ends in a semicolon.
|
---|
| 510 | **
|
---|
| 511 | ** Special handling is require for CREATE TRIGGER statements.
|
---|
| 512 | ** Whenever the CREATE TRIGGER keywords are seen, the statement
|
---|
| 513 | ** must end with ";END;".
|
---|
| 514 | **
|
---|
| 515 | ** This implementation uses a state machine with 7 states:
|
---|
| 516 | **
|
---|
| 517 | ** (0) START At the beginning or end of an SQL statement. This routine
|
---|
| 518 | ** returns 1 if it ends in the START state and 0 if it ends
|
---|
| 519 | ** in any other state.
|
---|
| 520 | **
|
---|
| 521 | ** (1) EXPLAIN The keyword EXPLAIN has been seen at the beginning of
|
---|
| 522 | ** a statement.
|
---|
| 523 | **
|
---|
| 524 | ** (2) CREATE The keyword CREATE has been seen at the beginning of a
|
---|
| 525 | ** statement, possibly preceeded by EXPLAIN and/or followed by
|
---|
| 526 | ** TEMP or TEMPORARY
|
---|
| 527 | **
|
---|
| 528 | ** (3) NORMAL We are in the middle of statement which ends with a single
|
---|
| 529 | ** semicolon.
|
---|
| 530 | **
|
---|
| 531 | ** (4) TRIGGER We are in the middle of a trigger definition that must be
|
---|
| 532 | ** ended by a semicolon, the keyword END, and another semicolon.
|
---|
| 533 | **
|
---|
| 534 | ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at
|
---|
| 535 | ** the end of a trigger definition.
|
---|
| 536 | **
|
---|
| 537 | ** (6) END We've seen the ";END" of the ";END;" that occurs at the end
|
---|
| 538 | ** of a trigger difinition.
|
---|
| 539 | **
|
---|
| 540 | ** Transitions between states above are determined by tokens extracted
|
---|
| 541 | ** from the input. The following tokens are significant:
|
---|
| 542 | **
|
---|
| 543 | ** (0) tkEXPLAIN The "explain" keyword.
|
---|
| 544 | ** (1) tkCREATE The "create" keyword.
|
---|
| 545 | ** (2) tkTEMP The "temp" or "temporary" keyword.
|
---|
| 546 | ** (3) tkTRIGGER The "trigger" keyword.
|
---|
| 547 | ** (4) tkEND The "end" keyword.
|
---|
| 548 | ** (5) tkSEMI A semicolon.
|
---|
| 549 | ** (6) tkWS Whitespace
|
---|
| 550 | ** (7) tkOTHER Any other SQL token.
|
---|
| 551 | **
|
---|
| 552 | ** Whitespace never causes a state transition and is always ignored.
|
---|
| 553 | */
|
---|
| 554 | int sqlite_complete(const char *zSql){
|
---|
| 555 | u8 state = 0; /* Current state, using numbers defined in header comment */
|
---|
| 556 | u8 token; /* Value of the next token */
|
---|
| 557 |
|
---|
| 558 | /* The following matrix defines the transition from one state to another
|
---|
| 559 | ** according to what token is seen. trans[state][token] returns the
|
---|
| 560 | ** next state.
|
---|
| 561 | */
|
---|
| 562 | static const u8 trans[7][8] = {
|
---|
| 563 | /* Token: */
|
---|
| 564 | /* State: ** EXPLAIN CREATE TEMP TRIGGER END SEMI WS OTHER */
|
---|
| 565 | /* 0 START: */ { 1, 2, 3, 3, 3, 0, 0, 3, },
|
---|
| 566 | /* 1 EXPLAIN: */ { 3, 2, 3, 3, 3, 0, 1, 3, },
|
---|
| 567 | /* 2 CREATE: */ { 3, 3, 2, 4, 3, 0, 2, 3, },
|
---|
| 568 | /* 3 NORMAL: */ { 3, 3, 3, 3, 3, 0, 3, 3, },
|
---|
| 569 | /* 4 TRIGGER: */ { 4, 4, 4, 4, 4, 5, 4, 4, },
|
---|
| 570 | /* 5 SEMI: */ { 4, 4, 4, 4, 6, 5, 5, 4, },
|
---|
| 571 | /* 6 END: */ { 4, 4, 4, 4, 4, 0, 6, 4, },
|
---|
| 572 | };
|
---|
| 573 |
|
---|
| 574 | while( *zSql ){
|
---|
| 575 | switch( *zSql ){
|
---|
| 576 | case ';': { /* A semicolon */
|
---|
| 577 | token = tkSEMI;
|
---|
| 578 | break;
|
---|
| 579 | }
|
---|
| 580 | case ' ':
|
---|
| 581 | case '\r':
|
---|
| 582 | case '\t':
|
---|
| 583 | case '\n':
|
---|
| 584 | case '\f': { /* White space is ignored */
|
---|
| 585 | token = tkWS;
|
---|
| 586 | break;
|
---|
| 587 | }
|
---|
| 588 | case '/': { /* C-style comments */
|
---|
| 589 | if( zSql[1]!='*' ){
|
---|
| 590 | token = tkOTHER;
|
---|
| 591 | break;
|
---|
| 592 | }
|
---|
| 593 | zSql += 2;
|
---|
| 594 | while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
|
---|
| 595 | if( zSql[0]==0 ) return 0;
|
---|
| 596 | zSql++;
|
---|
| 597 | token = tkWS;
|
---|
| 598 | break;
|
---|
| 599 | }
|
---|
| 600 | case '-': { /* SQL-style comments from "--" to end of line */
|
---|
| 601 | if( zSql[1]!='-' ){
|
---|
| 602 | token = tkOTHER;
|
---|
| 603 | break;
|
---|
| 604 | }
|
---|
| 605 | while( *zSql && *zSql!='\n' ){ zSql++; }
|
---|
| 606 | if( *zSql==0 ) return state==0;
|
---|
| 607 | token = tkWS;
|
---|
| 608 | break;
|
---|
| 609 | }
|
---|
| 610 | case '[': { /* Microsoft-style identifiers in [...] */
|
---|
| 611 | zSql++;
|
---|
| 612 | while( *zSql && *zSql!=']' ){ zSql++; }
|
---|
| 613 | if( *zSql==0 ) return 0;
|
---|
| 614 | token = tkOTHER;
|
---|
| 615 | break;
|
---|
| 616 | }
|
---|
| 617 | case '"': /* single- and double-quoted strings */
|
---|
| 618 | case '\'': {
|
---|
| 619 | int c = *zSql;
|
---|
| 620 | zSql++;
|
---|
| 621 | while( *zSql && *zSql!=c ){ zSql++; }
|
---|
| 622 | if( *zSql==0 ) return 0;
|
---|
| 623 | token = tkOTHER;
|
---|
| 624 | break;
|
---|
| 625 | }
|
---|
| 626 | default: {
|
---|
| 627 | if( isIdChar[(u8)*zSql] ){
|
---|
| 628 | /* Keywords and unquoted identifiers */
|
---|
| 629 | int nId;
|
---|
| 630 | for(nId=1; isIdChar[(u8)zSql[nId]]; nId++){}
|
---|
| 631 | switch( *zSql ){
|
---|
| 632 | case 'c': case 'C': {
|
---|
| 633 | if( nId==6 && sqliteStrNICmp(zSql, "create", 6)==0 ){
|
---|
| 634 | token = tkCREATE;
|
---|
| 635 | }else{
|
---|
| 636 | token = tkOTHER;
|
---|
| 637 | }
|
---|
| 638 | break;
|
---|
| 639 | }
|
---|
| 640 | case 't': case 'T': {
|
---|
| 641 | if( nId==7 && sqliteStrNICmp(zSql, "trigger", 7)==0 ){
|
---|
| 642 | token = tkTRIGGER;
|
---|
| 643 | }else if( nId==4 && sqliteStrNICmp(zSql, "temp", 4)==0 ){
|
---|
| 644 | token = tkTEMP;
|
---|
| 645 | }else if( nId==9 && sqliteStrNICmp(zSql, "temporary", 9)==0 ){
|
---|
| 646 | token = tkTEMP;
|
---|
| 647 | }else{
|
---|
| 648 | token = tkOTHER;
|
---|
| 649 | }
|
---|
| 650 | break;
|
---|
| 651 | }
|
---|
| 652 | case 'e': case 'E': {
|
---|
| 653 | if( nId==3 && sqliteStrNICmp(zSql, "end", 3)==0 ){
|
---|
| 654 | token = tkEND;
|
---|
| 655 | }else if( nId==7 && sqliteStrNICmp(zSql, "explain", 7)==0 ){
|
---|
| 656 | token = tkEXPLAIN;
|
---|
| 657 | }else{
|
---|
| 658 | token = tkOTHER;
|
---|
| 659 | }
|
---|
| 660 | break;
|
---|
| 661 | }
|
---|
| 662 | default: {
|
---|
| 663 | token = tkOTHER;
|
---|
| 664 | break;
|
---|
| 665 | }
|
---|
| 666 | }
|
---|
| 667 | zSql += nId-1;
|
---|
| 668 | }else{
|
---|
| 669 | /* Operators and special symbols */
|
---|
| 670 | token = tkOTHER;
|
---|
| 671 | }
|
---|
| 672 | break;
|
---|
| 673 | }
|
---|
| 674 | }
|
---|
| 675 | state = trans[state][token];
|
---|
| 676 | zSql++;
|
---|
| 677 | }
|
---|
| 678 | return state==0;
|
---|
| 679 | }
|
---|