Changeset 97 for trunk/src/helpers/xmltok_impl.c
- Timestamp:
- Aug 12, 2001, 5:34:51 PM (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/helpers/xmltok_impl.c
r38 r97 1 2 1 /* 3 *sourcefile xmltok_impl.c 4 * part of the expat implementation. See xmlparse.c. 5 * 6 * NOTE: This file must not be compiled directly. It is 7 * #include'd from xmltok.c several times. 8 */ 9 10 /* 11 * Copyright (C) 2001 Ulrich Mller. 12 * Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd. 13 * and Clark Cooper. 14 * 15 * Permission is hereby granted, free of charge, to any person obtaining 16 * a copy of this software and associated documentation files (the 17 * "Software"), to deal in the Software without restriction, including 18 * without limitation the rights to use, copy, modify, merge, publish, 19 * distribute, sublicense, and/or sell copies of the Software, and to 20 * permit persons to whom the Software is furnished to do so, subject to 21 * the following conditions: 22 * 23 * The above copyright notice and this permission notice shall be included 24 * in all copies or substantial portions of the Software. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 29 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 30 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 31 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 32 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 33 */ 2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 3 See the file COPYING for copying permission. 4 */ 34 5 35 6 #ifndef IS_INVALID_CHAR … … 117 88 /* ptr points to character following "<!-" */ 118 89 119 static int EXPATENTRY PREFIX(scanComment)(const ENCODING * enc, 120 const char *ptr, 121 const char *end, 122 const char **nextTokPtr) 123 { 124 if (ptr != end) 125 { 126 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) 127 { 128 *nextTokPtr = ptr; 129 return XML_TOK_INVALID; 130 } 131 ptr += MINBPC(enc); 132 while (ptr != end) 133 { 134 switch (BYTE_TYPE(enc, ptr)) 135 { 136 INVALID_CASES(ptr, nextTokPtr) 137 case BT_MINUS: 138 if ((ptr += MINBPC(enc)) == end) 139 return XML_TOK_PARTIAL; 140 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) 141 { 142 if ((ptr += MINBPC(enc)) == end) 143 return XML_TOK_PARTIAL; 144 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) 145 { 146 *nextTokPtr = ptr; 147 return XML_TOK_INVALID; 148 } 149 *nextTokPtr = ptr + MINBPC(enc); 150 return XML_TOK_COMMENT; 151 } 152 break; 153 default: 154 ptr += MINBPC(enc); 155 break; 156 } 157 } 158 } 159 return XML_TOK_PARTIAL; 90 static 91 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, 92 const char **nextTokPtr) 93 { 94 if (ptr != end) { 95 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 96 *nextTokPtr = ptr; 97 return XML_TOK_INVALID; 98 } 99 ptr += MINBPC(enc); 100 while (ptr != end) { 101 switch (BYTE_TYPE(enc, ptr)) { 102 INVALID_CASES(ptr, nextTokPtr) 103 case BT_MINUS: 104 if ((ptr += MINBPC(enc)) == end) 105 return XML_TOK_PARTIAL; 106 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { 107 if ((ptr += MINBPC(enc)) == end) 108 return XML_TOK_PARTIAL; 109 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 110 *nextTokPtr = ptr; 111 return XML_TOK_INVALID; 112 } 113 *nextTokPtr = ptr + MINBPC(enc); 114 return XML_TOK_COMMENT; 115 } 116 break; 117 default: 118 ptr += MINBPC(enc); 119 break; 120 } 121 } 122 } 123 return XML_TOK_PARTIAL; 160 124 } 161 125 162 126 /* ptr points to character following "<!" */ 163 127 164 static int EXPATENTRY PREFIX(scanDecl)(const ENCODING * enc, 165 const char *ptr, 166 const char *end, 167 const char **nextTokPtr) 168 { 128 static 129 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, 130 const char **nextTokPtr) 131 { 132 if (ptr == end) 133 return XML_TOK_PARTIAL; 134 switch (BYTE_TYPE(enc, ptr)) { 135 case BT_MINUS: 136 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 137 case BT_LSQB: 138 *nextTokPtr = ptr + MINBPC(enc); 139 return XML_TOK_COND_SECT_OPEN; 140 case BT_NMSTRT: 141 case BT_HEX: 142 ptr += MINBPC(enc); 143 break; 144 default: 145 *nextTokPtr = ptr; 146 return XML_TOK_INVALID; 147 } 148 while (ptr != end) { 149 switch (BYTE_TYPE(enc, ptr)) { 150 case BT_PERCNT: 151 if (ptr + MINBPC(enc) == end) 152 return XML_TOK_PARTIAL; 153 /* don't allow <!ENTITY% foo "whatever"> */ 154 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { 155 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: 156 *nextTokPtr = ptr; 157 return XML_TOK_INVALID; 158 } 159 /* fall through */ 160 case BT_S: case BT_CR: case BT_LF: 161 *nextTokPtr = ptr; 162 return XML_TOK_DECL_OPEN; 163 case BT_NMSTRT: 164 case BT_HEX: 165 ptr += MINBPC(enc); 166 break; 167 default: 168 *nextTokPtr = ptr; 169 return XML_TOK_INVALID; 170 } 171 } 172 return XML_TOK_PARTIAL; 173 } 174 175 static 176 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) 177 { 178 int upper = 0; 179 *tokPtr = XML_TOK_PI; 180 if (end - ptr != MINBPC(enc)*3) 181 return 1; 182 switch (BYTE_TO_ASCII(enc, ptr)) { 183 case ASCII_x: 184 break; 185 case ASCII_X: 186 upper = 1; 187 break; 188 default: 189 return 1; 190 } 191 ptr += MINBPC(enc); 192 switch (BYTE_TO_ASCII(enc, ptr)) { 193 case ASCII_m: 194 break; 195 case ASCII_M: 196 upper = 1; 197 break; 198 default: 199 return 1; 200 } 201 ptr += MINBPC(enc); 202 switch (BYTE_TO_ASCII(enc, ptr)) { 203 case ASCII_l: 204 break; 205 case ASCII_L: 206 upper = 1; 207 break; 208 default: 209 return 1; 210 } 211 if (upper) 212 return 0; 213 *tokPtr = XML_TOK_XML_DECL; 214 return 1; 215 } 216 217 /* ptr points to character following "<?" */ 218 219 static 220 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, 221 const char **nextTokPtr) 222 { 223 int tok; 224 const char *target = ptr; 225 if (ptr == end) 226 return XML_TOK_PARTIAL; 227 switch (BYTE_TYPE(enc, ptr)) { 228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 229 default: 230 *nextTokPtr = ptr; 231 return XML_TOK_INVALID; 232 } 233 while (ptr != end) { 234 switch (BYTE_TYPE(enc, ptr)) { 235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 236 case BT_S: case BT_CR: case BT_LF: 237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 238 *nextTokPtr = ptr; 239 return XML_TOK_INVALID; 240 } 241 ptr += MINBPC(enc); 242 while (ptr != end) { 243 switch (BYTE_TYPE(enc, ptr)) { 244 INVALID_CASES(ptr, nextTokPtr) 245 case BT_QUEST: 246 ptr += MINBPC(enc); 247 if (ptr == end) 248 return XML_TOK_PARTIAL; 249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 250 *nextTokPtr = ptr + MINBPC(enc); 251 return tok; 252 } 253 break; 254 default: 255 ptr += MINBPC(enc); 256 break; 257 } 258 } 259 return XML_TOK_PARTIAL; 260 case BT_QUEST: 261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { 262 *nextTokPtr = ptr; 263 return XML_TOK_INVALID; 264 } 265 ptr += MINBPC(enc); 266 if (ptr == end) 267 return XML_TOK_PARTIAL; 268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 269 *nextTokPtr = ptr + MINBPC(enc); 270 return tok; 271 } 272 /* fall through */ 273 default: 274 *nextTokPtr = ptr; 275 return XML_TOK_INVALID; 276 } 277 } 278 return XML_TOK_PARTIAL; 279 } 280 281 282 static 283 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, 284 const char **nextTokPtr) 285 { 286 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; 287 int i; 288 /* CDATA[ */ 289 if (end - ptr < 6 * MINBPC(enc)) 290 return XML_TOK_PARTIAL; 291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { 292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { 293 *nextTokPtr = ptr; 294 return XML_TOK_INVALID; 295 } 296 } 297 *nextTokPtr = ptr; 298 return XML_TOK_CDATA_SECT_OPEN; 299 } 300 301 static 302 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, 303 const char **nextTokPtr) 304 { 305 if (ptr == end) 306 return XML_TOK_NONE; 307 if (MINBPC(enc) > 1) { 308 size_t n = end - ptr; 309 if (n & (MINBPC(enc) - 1)) { 310 n &= ~(MINBPC(enc) - 1); 311 if (n == 0) 312 return XML_TOK_PARTIAL; 313 end = ptr + n; 314 } 315 } 316 switch (BYTE_TYPE(enc, ptr)) { 317 case BT_RSQB: 318 ptr += MINBPC(enc); 169 319 if (ptr == end) 170 return XML_TOK_PARTIAL; 171 switch (BYTE_TYPE(enc, ptr)) 172 { 173 case BT_MINUS: 174 return PREFIX(scanComment) (enc, ptr + MINBPC(enc), end, nextTokPtr); 175 case BT_LSQB: 176 *nextTokPtr = ptr + MINBPC(enc); 177 return XML_TOK_COND_SECT_OPEN; 178 case BT_NMSTRT: 179 case BT_HEX: 180 ptr += MINBPC(enc); 181 break; 182 default: 183 *nextTokPtr = ptr; 184 return XML_TOK_INVALID; 185 } 186 while (ptr != end) 187 { 188 switch (BYTE_TYPE(enc, ptr)) 189 { 190 case BT_PERCNT: 191 if (ptr + MINBPC(enc) == end) 192 return XML_TOK_PARTIAL; 193 /* don't allow <!ENTITY% foo "whatever"> */ 194 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) 195 { 196 case BT_S: 197 case BT_CR: 198 case BT_LF: 199 case BT_PERCNT: 200 *nextTokPtr = ptr; 201 return XML_TOK_INVALID; 202 } 203 /* fall through */ 204 case BT_S: 205 case BT_CR: 206 case BT_LF: 207 *nextTokPtr = ptr; 208 return XML_TOK_DECL_OPEN; 209 case BT_NMSTRT: 210 case BT_HEX: 211 ptr += MINBPC(enc); 212 break; 213 default: 214 *nextTokPtr = ptr; 215 return XML_TOK_INVALID; 216 } 217 } 218 return XML_TOK_PARTIAL; 219 } 220 221 static int EXPATENTRY PREFIX(checkPiTarget) (const ENCODING * enc, const char *ptr, const char *end, int *tokPtr) 222 { 223 int upper = 0; 224 225 *tokPtr = XML_TOK_PI; 226 if (end - ptr != MINBPC(enc) * 3) 227 return 1; 228 switch (BYTE_TO_ASCII(enc, ptr)) 229 { 230 case ASCII_x: 231 break; 232 case ASCII_X: 233 upper = 1; 234 break; 235 default: 236 return 1; 237 } 238 ptr += MINBPC(enc); 239 switch (BYTE_TO_ASCII(enc, ptr)) 240 { 241 case ASCII_m: 242 break; 243 case ASCII_M: 244 upper = 1; 245 break; 246 default: 247 return 1; 248 } 249 ptr += MINBPC(enc); 250 switch (BYTE_TO_ASCII(enc, ptr)) 251 { 252 case ASCII_l: 253 break; 254 case ASCII_L: 255 upper = 1; 256 break; 257 default: 258 return 1; 259 } 260 if (upper) 261 return 0; 262 *tokPtr = XML_TOK_XML_DECL; 263 return 1; 264 } 265 266 /* ptr points to character following "<?" */ 267 268 static 269 int EXPATENTRY PREFIX(scanPi) (const ENCODING * enc, const char *ptr, const char *end, 270 const char **nextTokPtr) 271 { 272 int tok; 273 const char *target = ptr; 274 320 return XML_TOK_PARTIAL; 321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 322 break; 323 ptr += MINBPC(enc); 275 324 if (ptr == end) 276 return XML_TOK_PARTIAL; 277 switch (BYTE_TYPE(enc, ptr)) 278 { 279 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 280 default: 281 *nextTokPtr = ptr; 282 return XML_TOK_INVALID; 283 } 284 while (ptr != end) 285 { 286 switch (BYTE_TYPE(enc, ptr)) 287 { 288 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 289 case BT_S: 290 case BT_CR: 291 case BT_LF: 292 if (!PREFIX(checkPiTarget) (enc, target, ptr, &tok)) 293 { 294 *nextTokPtr = ptr; 295 return XML_TOK_INVALID; 296 } 297 ptr += MINBPC(enc); 298 while (ptr != end) 299 { 300 switch (BYTE_TYPE(enc, ptr)) 301 { 302 INVALID_CASES(ptr, nextTokPtr) 303 case BT_QUEST: 304 ptr += MINBPC(enc); 305 if (ptr == end) 306 return XML_TOK_PARTIAL; 307 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) 308 { 309 *nextTokPtr = ptr + MINBPC(enc); 310 return tok; 311 } 312 break; 313 default: 314 ptr += MINBPC(enc); 315 break; 316 } 317 } 318 return XML_TOK_PARTIAL; 319 case BT_QUEST: 320 if (!PREFIX(checkPiTarget) (enc, target, ptr, &tok)) 321 { 322 *nextTokPtr = ptr; 323 return XML_TOK_INVALID; 324 } 325 ptr += MINBPC(enc); 326 if (ptr == end) 327 return XML_TOK_PARTIAL; 328 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) 329 { 330 *nextTokPtr = ptr + MINBPC(enc); 331 return tok; 332 } 333 /* fall through */ 334 default: 335 *nextTokPtr = ptr; 336 return XML_TOK_INVALID; 337 } 338 } 339 return XML_TOK_PARTIAL; 340 } 341 342 343 static 344 int EXPATENTRY PREFIX(scanCdataSection) (const ENCODING * enc, const char *ptr, const char *end, 345 const char **nextTokPtr) 346 { 347 static const char CDATA_LSQB[] = 348 {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB}; 349 int i; 350 351 /* CDATA[ */ 352 if (end - ptr < 6 * MINBPC(enc)) 353 return XML_TOK_PARTIAL; 354 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) 355 { 356 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) 357 { 358 *nextTokPtr = ptr; 359 return XML_TOK_INVALID; 360 } 361 } 362 *nextTokPtr = ptr; 363 return XML_TOK_CDATA_SECT_OPEN; 364 } 365 366 static 367 int EXPATENTRY PREFIX(cdataSectionTok) (const ENCODING * enc, const char *ptr, const char *end, 368 const char **nextTokPtr) 369 { 325 return XML_TOK_PARTIAL; 326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 327 ptr -= MINBPC(enc); 328 break; 329 } 330 *nextTokPtr = ptr + MINBPC(enc); 331 return XML_TOK_CDATA_SECT_CLOSE; 332 case BT_CR: 333 ptr += MINBPC(enc); 370 334 if (ptr == end) 371 return XML_TOK_NONE; 372 if (MINBPC(enc) > 1) 373 { 374 size_t n = end - ptr; 375 376 if (n & (MINBPC(enc) - 1)) 377 { 378 n &= ~(MINBPC(enc) - 1); 379 if (n == 0) 380 return XML_TOK_PARTIAL; 381 end = ptr + n; 382 } 383 } 384 switch (BYTE_TYPE(enc, ptr)) 385 { 386 case BT_RSQB: 387 ptr += MINBPC(enc); 388 if (ptr == end) 389 return XML_TOK_PARTIAL; 390 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 391 break; 392 ptr += MINBPC(enc); 393 if (ptr == end) 394 return XML_TOK_PARTIAL; 395 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) 396 { 397 ptr -= MINBPC(enc); 398 break; 399 } 400 *nextTokPtr = ptr + MINBPC(enc); 401 return XML_TOK_CDATA_SECT_CLOSE; 402 case BT_CR: 403 ptr += MINBPC(enc); 404 if (ptr == end) 405 return XML_TOK_PARTIAL; 406 if (BYTE_TYPE(enc, ptr) == BT_LF) 407 ptr += MINBPC(enc); 408 *nextTokPtr = ptr; 409 return XML_TOK_DATA_NEWLINE; 410 case BT_LF: 411 *nextTokPtr = ptr + MINBPC(enc); 412 return XML_TOK_DATA_NEWLINE; 413 INVALID_CASES(ptr, nextTokPtr) 414 default: 415 ptr += MINBPC(enc); 416 break; 417 } 418 while (ptr != end) 419 { 420 switch (BYTE_TYPE(enc, ptr)) 421 { 335 return XML_TOK_PARTIAL; 336 if (BYTE_TYPE(enc, ptr) == BT_LF) 337 ptr += MINBPC(enc); 338 *nextTokPtr = ptr; 339 return XML_TOK_DATA_NEWLINE; 340 case BT_LF: 341 *nextTokPtr = ptr + MINBPC(enc); 342 return XML_TOK_DATA_NEWLINE; 343 INVALID_CASES(ptr, nextTokPtr) 344 default: 345 ptr += MINBPC(enc); 346 break; 347 } 348 while (ptr != end) { 349 switch (BYTE_TYPE(enc, ptr)) { 422 350 #define LEAD_CASE(n) \ 423 351 case BT_LEAD ## n: \ … … 428 356 ptr += n; \ 429 357 break; 430 358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 431 359 #undef LEAD_CASE 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 360 case BT_NONXML: 361 case BT_MALFORM: 362 case BT_TRAIL: 363 case BT_CR: 364 case BT_LF: 365 case BT_RSQB: 366 *nextTokPtr = ptr; 367 return XML_TOK_DATA_CHARS; 368 default: 369 ptr += MINBPC(enc); 370 break; 371 } 372 } 373 *nextTokPtr = ptr; 374 return XML_TOK_DATA_CHARS; 447 375 } 448 376 … … 450 378 451 379 static 452 int EXPATENTRY PREFIX(scanEndTag) (const ENCODING * enc, const char *ptr, const char *end, 453 const char **nextTokPtr) 454 { 380 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, 381 const char **nextTokPtr) 382 { 383 if (ptr == end) 384 return XML_TOK_PARTIAL; 385 switch (BYTE_TYPE(enc, ptr)) { 386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 387 default: 388 *nextTokPtr = ptr; 389 return XML_TOK_INVALID; 390 } 391 while (ptr != end) { 392 switch (BYTE_TYPE(enc, ptr)) { 393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 394 case BT_S: case BT_CR: case BT_LF: 395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 396 switch (BYTE_TYPE(enc, ptr)) { 397 case BT_S: case BT_CR: case BT_LF: 398 break; 399 case BT_GT: 400 *nextTokPtr = ptr + MINBPC(enc); 401 return XML_TOK_END_TAG; 402 default: 403 *nextTokPtr = ptr; 404 return XML_TOK_INVALID; 405 } 406 } 407 return XML_TOK_PARTIAL; 408 #ifdef XML_NS 409 case BT_COLON: 410 /* no need to check qname syntax here, since end-tag must match exactly */ 411 ptr += MINBPC(enc); 412 break; 413 #endif 414 case BT_GT: 415 *nextTokPtr = ptr + MINBPC(enc); 416 return XML_TOK_END_TAG; 417 default: 418 *nextTokPtr = ptr; 419 return XML_TOK_INVALID; 420 } 421 } 422 return XML_TOK_PARTIAL; 423 } 424 425 /* ptr points to character following "&#X" */ 426 427 static 428 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, 429 const char **nextTokPtr) 430 { 431 if (ptr != end) { 432 switch (BYTE_TYPE(enc, ptr)) { 433 case BT_DIGIT: 434 case BT_HEX: 435 break; 436 default: 437 *nextTokPtr = ptr; 438 return XML_TOK_INVALID; 439 } 440 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 441 switch (BYTE_TYPE(enc, ptr)) { 442 case BT_DIGIT: 443 case BT_HEX: 444 break; 445 case BT_SEMI: 446 *nextTokPtr = ptr + MINBPC(enc); 447 return XML_TOK_CHAR_REF; 448 default: 449 *nextTokPtr = ptr; 450 return XML_TOK_INVALID; 451 } 452 } 453 } 454 return XML_TOK_PARTIAL; 455 } 456 457 /* ptr points to character following "&#" */ 458 459 static 460 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, 461 const char **nextTokPtr) 462 { 463 if (ptr != end) { 464 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 465 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 466 switch (BYTE_TYPE(enc, ptr)) { 467 case BT_DIGIT: 468 break; 469 default: 470 *nextTokPtr = ptr; 471 return XML_TOK_INVALID; 472 } 473 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { 474 switch (BYTE_TYPE(enc, ptr)) { 475 case BT_DIGIT: 476 break; 477 case BT_SEMI: 478 *nextTokPtr = ptr + MINBPC(enc); 479 return XML_TOK_CHAR_REF; 480 default: 481 *nextTokPtr = ptr; 482 return XML_TOK_INVALID; 483 } 484 } 485 } 486 return XML_TOK_PARTIAL; 487 } 488 489 /* ptr points to character following "&" */ 490 491 static 492 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, 493 const char **nextTokPtr) 494 { 495 if (ptr == end) 496 return XML_TOK_PARTIAL; 497 switch (BYTE_TYPE(enc, ptr)) { 498 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 499 case BT_NUM: 500 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 501 default: 502 *nextTokPtr = ptr; 503 return XML_TOK_INVALID; 504 } 505 while (ptr != end) { 506 switch (BYTE_TYPE(enc, ptr)) { 507 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 508 case BT_SEMI: 509 *nextTokPtr = ptr + MINBPC(enc); 510 return XML_TOK_ENTITY_REF; 511 default: 512 *nextTokPtr = ptr; 513 return XML_TOK_INVALID; 514 } 515 } 516 return XML_TOK_PARTIAL; 517 } 518 519 /* ptr points to character following first character of attribute name */ 520 521 static 522 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, 523 const char **nextTokPtr) 524 { 525 #ifdef XML_NS 526 int hadColon = 0; 527 #endif 528 while (ptr != end) { 529 switch (BYTE_TYPE(enc, ptr)) { 530 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 531 #ifdef XML_NS 532 case BT_COLON: 533 if (hadColon) { 534 *nextTokPtr = ptr; 535 return XML_TOK_INVALID; 536 } 537 hadColon = 1; 538 ptr += MINBPC(enc); 539 if (ptr == end) 540 return XML_TOK_PARTIAL; 541 switch (BYTE_TYPE(enc, ptr)) { 542 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 543 default: 544 *nextTokPtr = ptr; 545 return XML_TOK_INVALID; 546 } 547 break; 548 #endif 549 case BT_S: case BT_CR: case BT_LF: 550 for (;;) { 551 int t; 552 553 ptr += MINBPC(enc); 455 554 if (ptr == end) 456 return XML_TOK_PARTIAL; 457 switch (BYTE_TYPE(enc, ptr)) 458 { 459 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 460 default: 461 *nextTokPtr = ptr; 462 return XML_TOK_INVALID; 463 } 464 while (ptr != end) 465 { 466 switch (BYTE_TYPE(enc, ptr)) 467 { 468 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 469 case BT_S: 470 case BT_CR: 471 case BT_LF: 472 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) 473 { 474 switch (BYTE_TYPE(enc, ptr)) 475 { 476 case BT_S: 477 case BT_CR: 478 case BT_LF: 479 break; 480 case BT_GT: 481 *nextTokPtr = ptr + MINBPC(enc); 482 return XML_TOK_END_TAG; 483 default: 484 *nextTokPtr = ptr; 485 return XML_TOK_INVALID; 486 } 487 } 488 return XML_TOK_PARTIAL; 489 #ifdef XML_NS 490 case BT_COLON: 491 /* no need to check qname syntax here, since end-tag must match exactly */ 492 ptr += MINBPC(enc); 493 break; 494 #endif 495 case BT_GT: 496 *nextTokPtr = ptr + MINBPC(enc); 497 return XML_TOK_END_TAG; 498 default: 499 *nextTokPtr = ptr; 500 return XML_TOK_INVALID; 501 } 502 } 503 return XML_TOK_PARTIAL; 504 } 505 506 /* ptr points to character following "&#X" */ 507 508 static 509 int EXPATENTRY PREFIX(scanHexCharRef) (const ENCODING * enc, const char *ptr, const char *end, 510 const char **nextTokPtr) 511 { 512 if (ptr != end) 513 { 514 switch (BYTE_TYPE(enc, ptr)) 515 { 516 case BT_DIGIT: 517 case BT_HEX: 518 break; 519 default: 520 *nextTokPtr = ptr; 521 return XML_TOK_INVALID; 522 } 523 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) 524 { 525 switch (BYTE_TYPE(enc, ptr)) 526 { 527 case BT_DIGIT: 528 case BT_HEX: 529 break; 530 case BT_SEMI: 531 *nextTokPtr = ptr + MINBPC(enc); 532 return XML_TOK_CHAR_REF; 533 default: 534 *nextTokPtr = ptr; 535 return XML_TOK_INVALID; 536 } 537 } 538 } 539 return XML_TOK_PARTIAL; 540 } 541 542 /* ptr points to character following "&#" */ 543 544 static 545 int EXPATENTRY PREFIX(scanCharRef) (const ENCODING * enc, const char *ptr, const char *end, 546 const char **nextTokPtr) 547 { 548 if (ptr != end) 549 { 550 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 551 return PREFIX(scanHexCharRef) (enc, ptr + MINBPC(enc), end, nextTokPtr); 552 switch (BYTE_TYPE(enc, ptr)) 553 { 554 case BT_DIGIT: 555 break; 556 default: 557 *nextTokPtr = ptr; 558 return XML_TOK_INVALID; 559 } 560 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) 561 { 562 switch (BYTE_TYPE(enc, ptr)) 563 { 564 case BT_DIGIT: 565 break; 566 case BT_SEMI: 567 *nextTokPtr = ptr + MINBPC(enc); 568 return XML_TOK_CHAR_REF; 569 default: 570 *nextTokPtr = ptr; 571 return XML_TOK_INVALID; 572 } 573 } 574 } 575 return XML_TOK_PARTIAL; 576 } 577 578 /* ptr points to character following "&" */ 579 580 static 581 int EXPATENTRY PREFIX(scanRef) (const ENCODING * enc, const char *ptr, const char *end, 582 const char **nextTokPtr) 583 { 584 if (ptr == end) 585 return XML_TOK_PARTIAL; 586 switch (BYTE_TYPE(enc, ptr)) 587 { 588 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 589 case BT_NUM: 590 return PREFIX(scanCharRef) (enc, ptr + MINBPC(enc), end, nextTokPtr); 591 default: 592 *nextTokPtr = ptr; 593 return XML_TOK_INVALID; 594 } 595 while (ptr != end) 596 { 597 switch (BYTE_TYPE(enc, ptr)) 598 { 599 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 600 case BT_SEMI: 601 *nextTokPtr = ptr + MINBPC(enc); 602 return XML_TOK_ENTITY_REF; 603 default: 604 *nextTokPtr = ptr; 605 return XML_TOK_INVALID; 606 } 607 } 608 return XML_TOK_PARTIAL; 609 } 610 611 /* ptr points to character following first character of attribute name */ 612 613 static 614 int EXPATENTRY PREFIX(scanAtts) (const ENCODING * enc, const char *ptr, const char *end, 615 const char **nextTokPtr) 616 { 617 #ifdef XML_NS 618 int hadColon = 0; 619 620 #endif 621 while (ptr != end) 622 { 623 switch (BYTE_TYPE(enc, ptr)) 624 { 625 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 626 #ifdef XML_NS 627 case BT_COLON: 628 if (hadColon) 629 { 630 *nextTokPtr = ptr; 631 return XML_TOK_INVALID; 632 } 633 hadColon = 1; 634 ptr += MINBPC(enc); 635 if (ptr == end) 636 return XML_TOK_PARTIAL; 637 switch (BYTE_TYPE(enc, ptr)) 638 { 639 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 640 default: 641 *nextTokPtr = ptr; 642 return XML_TOK_INVALID; 643 } 644 break; 645 #endif 646 case BT_S: 647 case BT_CR: 648 case BT_LF: 649 for (;;) 650 { 651 int t; 652 653 ptr += MINBPC(enc); 654 if (ptr == end) 655 return XML_TOK_PARTIAL; 656 t = BYTE_TYPE(enc, ptr); 657 if (t == BT_EQUALS) 658 break; 659 switch (t) 660 { 661 case BT_S: 662 case BT_LF: 663 case BT_CR: 664 break; 665 default: 666 *nextTokPtr = ptr; 667 return XML_TOK_INVALID; 668 } 669 } 670 /* fall through */ 671 case BT_EQUALS: 672 { 673 int open; 674 675 #ifdef XML_NS 676 hadColon = 0; 677 #endif 678 for (;;) 679 { 680 681 ptr += MINBPC(enc); 682 if (ptr == end) 683 return XML_TOK_PARTIAL; 684 open = BYTE_TYPE(enc, ptr); 685 if (open == BT_QUOT || open == BT_APOS) 686 break; 687 switch (open) 688 { 689 case BT_S: 690 case BT_LF: 691 case BT_CR: 692 break; 693 default: 694 *nextTokPtr = ptr; 695 return XML_TOK_INVALID; 696 } 697 } 698 ptr += MINBPC(enc); 699 /* in attribute value */ 700 for (;;) 701 { 702 int t; 703 704 if (ptr == end) 705 return XML_TOK_PARTIAL; 706 t = BYTE_TYPE(enc, ptr); 707 if (t == open) 708 break; 709 switch (t) 710 { 711 INVALID_CASES(ptr, nextTokPtr) 712 case BT_AMP: 713 { 714 int tok = PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, &ptr); 715 716 if (tok <= 0) 717 { 718 if (tok == XML_TOK_INVALID) 719 *nextTokPtr = ptr; 720 return tok; 721 } 722 break; 723 } 724 case BT_LT: 725 *nextTokPtr = ptr; 726 return XML_TOK_INVALID; 727 default: 728 ptr += MINBPC(enc); 729 break; 730 } 731 } 732 ptr += MINBPC(enc); 733 if (ptr == end) 734 return XML_TOK_PARTIAL; 735 switch (BYTE_TYPE(enc, ptr)) 736 { 737 case BT_S: 738 case BT_CR: 739 case BT_LF: 740 break; 741 case BT_SOL: 742 goto sol; 743 case BT_GT: 744 goto gt; 745 default: 746 *nextTokPtr = ptr; 747 return XML_TOK_INVALID; 748 } 749 /* ptr points to closing quote */ 750 for (;;) 751 { 752 ptr += MINBPC(enc); 753 if (ptr == end) 754 return XML_TOK_PARTIAL; 755 switch (BYTE_TYPE(enc, ptr)) 756 { 757 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 758 case BT_S: 759 case BT_CR: 760 case BT_LF: 761 continue; 762 case BT_GT: 763 gt: 764 *nextTokPtr = ptr + MINBPC(enc); 765 return XML_TOK_START_TAG_WITH_ATTS; 766 case BT_SOL: 767 sol: 768 ptr += MINBPC(enc); 769 if (ptr == end) 770 return XML_TOK_PARTIAL; 771 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) 772 { 773 *nextTokPtr = ptr; 774 return XML_TOK_INVALID; 775 } 776 *nextTokPtr = ptr + MINBPC(enc); 777 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 778 default: 779 *nextTokPtr = ptr; 780 return XML_TOK_INVALID; 781 } 782 break; 783 } 784 break; 785 } 786 default: 787 *nextTokPtr = ptr; 788 return XML_TOK_INVALID; 789 } 790 } 791 return XML_TOK_PARTIAL; 792 } 793 794 /* ptr points to character following "<" */ 795 796 static 797 int EXPATENTRY PREFIX(scanLt) (const ENCODING * enc, const char *ptr, const char *end, 798 const char **nextTokPtr) 799 { 800 #ifdef XML_NS 801 int hadColon; 802 803 #endif 804 if (ptr == end) 805 return XML_TOK_PARTIAL; 806 switch (BYTE_TYPE(enc, ptr)) 807 { 808 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 809 case BT_EXCL: 810 if ((ptr += MINBPC(enc)) == end) 811 return XML_TOK_PARTIAL; 812 switch (BYTE_TYPE(enc, ptr)) 813 { 814 case BT_MINUS: 815 return PREFIX(scanComment) (enc, ptr + MINBPC(enc), end, nextTokPtr); 816 case BT_LSQB: 817 return PREFIX(scanCdataSection) (enc, ptr + MINBPC(enc), end, nextTokPtr); 818 } 819 *nextTokPtr = ptr; 820 return XML_TOK_INVALID; 821 case BT_QUEST: 822 return PREFIX(scanPi) (enc, ptr + MINBPC(enc), end, nextTokPtr); 823 case BT_SOL: 824 return PREFIX(scanEndTag) (enc, ptr + MINBPC(enc), end, nextTokPtr); 825 default: 826 *nextTokPtr = ptr; 827 return XML_TOK_INVALID; 828 } 555 return XML_TOK_PARTIAL; 556 t = BYTE_TYPE(enc, ptr); 557 if (t == BT_EQUALS) 558 break; 559 switch (t) { 560 case BT_S: 561 case BT_LF: 562 case BT_CR: 563 break; 564 default: 565 *nextTokPtr = ptr; 566 return XML_TOK_INVALID; 567 } 568 } 569 /* fall through */ 570 case BT_EQUALS: 571 { 572 int open; 829 573 #ifdef XML_NS 830 574 hadColon = 0; 831 575 #endif 832 /* we have a start-tag */ 833 while (ptr != end) 834 { 835 switch (BYTE_TYPE(enc, ptr)) 576 for (;;) { 577 578 ptr += MINBPC(enc); 579 if (ptr == end) 580 return XML_TOK_PARTIAL; 581 open = BYTE_TYPE(enc, ptr); 582 if (open == BT_QUOT || open == BT_APOS) 583 break; 584 switch (open) { 585 case BT_S: 586 case BT_LF: 587 case BT_CR: 588 break; 589 default: 590 *nextTokPtr = ptr; 591 return XML_TOK_INVALID; 592 } 593 } 594 ptr += MINBPC(enc); 595 /* in attribute value */ 596 for (;;) { 597 int t; 598 if (ptr == end) 599 return XML_TOK_PARTIAL; 600 t = BYTE_TYPE(enc, ptr); 601 if (t == open) 602 break; 603 switch (t) { 604 INVALID_CASES(ptr, nextTokPtr) 605 case BT_AMP: 836 606 { 837 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); 608 if (tok <= 0) { 609 if (tok == XML_TOK_INVALID) 610 *nextTokPtr = ptr; 611 return tok; 612 } 613 break; 614 } 615 case BT_LT: 616 *nextTokPtr = ptr; 617 return XML_TOK_INVALID; 618 default: 619 ptr += MINBPC(enc); 620 break; 621 } 622 } 623 ptr += MINBPC(enc); 624 if (ptr == end) 625 return XML_TOK_PARTIAL; 626 switch (BYTE_TYPE(enc, ptr)) { 627 case BT_S: 628 case BT_CR: 629 case BT_LF: 630 break; 631 case BT_SOL: 632 goto sol; 633 case BT_GT: 634 goto gt; 635 default: 636 *nextTokPtr = ptr; 637 return XML_TOK_INVALID; 638 } 639 /* ptr points to closing quote */ 640 for (;;) { 641 ptr += MINBPC(enc); 642 if (ptr == end) 643 return XML_TOK_PARTIAL; 644 switch (BYTE_TYPE(enc, ptr)) { 645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 646 case BT_S: case BT_CR: case BT_LF: 647 continue; 648 case BT_GT: 649 gt: 650 *nextTokPtr = ptr + MINBPC(enc); 651 return XML_TOK_START_TAG_WITH_ATTS; 652 case BT_SOL: 653 sol: 654 ptr += MINBPC(enc); 655 if (ptr == end) 656 return XML_TOK_PARTIAL; 657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 658 *nextTokPtr = ptr; 659 return XML_TOK_INVALID; 660 } 661 *nextTokPtr = ptr + MINBPC(enc); 662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; 663 default: 664 *nextTokPtr = ptr; 665 return XML_TOK_INVALID; 666 } 667 break; 668 } 669 break; 670 } 671 default: 672 *nextTokPtr = ptr; 673 return XML_TOK_INVALID; 674 } 675 } 676 return XML_TOK_PARTIAL; 677 } 678 679 /* ptr points to character following "<" */ 680 681 static 682 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, 683 const char **nextTokPtr) 684 { 838 685 #ifdef XML_NS 839 case BT_COLON: 840 if (hadColon) 841 { 842 *nextTokPtr = ptr; 843 return XML_TOK_INVALID; 844 } 845 hadColon = 1; 846 ptr += MINBPC(enc); 847 if (ptr == end) 848 return XML_TOK_PARTIAL; 849 switch (BYTE_TYPE(enc, ptr)) 850 { 851 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 852 default: 853 *nextTokPtr = ptr; 854 return XML_TOK_INVALID; 855 } 856 break; 686 int hadColon; 857 687 #endif 858 case BT_S: 859 case BT_CR: 860 case BT_LF: 861 { 862 ptr += MINBPC(enc); 863 while (ptr != end) 864 { 865 switch (BYTE_TYPE(enc, ptr)) 866 { 867 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 868 case BT_GT: 869 goto gt; 870 case BT_SOL: 871 goto sol; 872 case BT_S: 873 case BT_CR: 874 case BT_LF: 875 ptr += MINBPC(enc); 876 continue; 877 default: 878 *nextTokPtr = ptr; 879 return XML_TOK_INVALID; 880 } 881 return PREFIX(scanAtts) (enc, ptr, end, nextTokPtr); 882 } 883 return XML_TOK_PARTIAL; 884 } 885 case BT_GT: 886 gt: 887 *nextTokPtr = ptr + MINBPC(enc); 888 return XML_TOK_START_TAG_NO_ATTS; 889 case BT_SOL: 890 sol: 891 ptr += MINBPC(enc); 892 if (ptr == end) 893 return XML_TOK_PARTIAL; 894 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) 895 { 896 *nextTokPtr = ptr; 897 return XML_TOK_INVALID; 898 } 899 *nextTokPtr = ptr + MINBPC(enc); 900 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 901 default: 902 *nextTokPtr = ptr; 903 return XML_TOK_INVALID; 904 } 905 } 906 return XML_TOK_PARTIAL; 907 } 908 909 static 910 int EXPATENTRY PREFIX(contentTok) (const ENCODING * enc, const char *ptr, const char *end, 911 const char **nextTokPtr) 912 { 688 if (ptr == end) 689 return XML_TOK_PARTIAL; 690 switch (BYTE_TYPE(enc, ptr)) { 691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 692 case BT_EXCL: 693 if ((ptr += MINBPC(enc)) == end) 694 return XML_TOK_PARTIAL; 695 switch (BYTE_TYPE(enc, ptr)) { 696 case BT_MINUS: 697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); 698 case BT_LSQB: 699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); 700 } 701 *nextTokPtr = ptr; 702 return XML_TOK_INVALID; 703 case BT_QUEST: 704 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 705 case BT_SOL: 706 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); 707 default: 708 *nextTokPtr = ptr; 709 return XML_TOK_INVALID; 710 } 711 #ifdef XML_NS 712 hadColon = 0; 713 #endif 714 /* we have a start-tag */ 715 while (ptr != end) { 716 switch (BYTE_TYPE(enc, ptr)) { 717 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 718 #ifdef XML_NS 719 case BT_COLON: 720 if (hadColon) { 721 *nextTokPtr = ptr; 722 return XML_TOK_INVALID; 723 } 724 hadColon = 1; 725 ptr += MINBPC(enc); 726 if (ptr == end) 727 return XML_TOK_PARTIAL; 728 switch (BYTE_TYPE(enc, ptr)) { 729 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 730 default: 731 *nextTokPtr = ptr; 732 return XML_TOK_INVALID; 733 } 734 break; 735 #endif 736 case BT_S: case BT_CR: case BT_LF: 737 { 738 ptr += MINBPC(enc); 739 while (ptr != end) { 740 switch (BYTE_TYPE(enc, ptr)) { 741 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 742 case BT_GT: 743 goto gt; 744 case BT_SOL: 745 goto sol; 746 case BT_S: case BT_CR: case BT_LF: 747 ptr += MINBPC(enc); 748 continue; 749 default: 750 *nextTokPtr = ptr; 751 return XML_TOK_INVALID; 752 } 753 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); 754 } 755 return XML_TOK_PARTIAL; 756 } 757 case BT_GT: 758 gt: 759 *nextTokPtr = ptr + MINBPC(enc); 760 return XML_TOK_START_TAG_NO_ATTS; 761 case BT_SOL: 762 sol: 763 ptr += MINBPC(enc); 764 if (ptr == end) 765 return XML_TOK_PARTIAL; 766 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 767 *nextTokPtr = ptr; 768 return XML_TOK_INVALID; 769 } 770 *nextTokPtr = ptr + MINBPC(enc); 771 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; 772 default: 773 *nextTokPtr = ptr; 774 return XML_TOK_INVALID; 775 } 776 } 777 return XML_TOK_PARTIAL; 778 } 779 780 static 781 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, 782 const char **nextTokPtr) 783 { 784 if (ptr == end) 785 return XML_TOK_NONE; 786 if (MINBPC(enc) > 1) { 787 size_t n = end - ptr; 788 if (n & (MINBPC(enc) - 1)) { 789 n &= ~(MINBPC(enc) - 1); 790 if (n == 0) 791 return XML_TOK_PARTIAL; 792 end = ptr + n; 793 } 794 } 795 switch (BYTE_TYPE(enc, ptr)) { 796 case BT_LT: 797 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); 798 case BT_AMP: 799 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 800 case BT_CR: 801 ptr += MINBPC(enc); 913 802 if (ptr == end) 914 return XML_TOK_NONE; 915 if (MINBPC(enc) > 1) 916 { 917 size_t n = end - ptr; 918 919 if (n & (MINBPC(enc) - 1)) 920 { 921 n &= ~(MINBPC(enc) - 1); 922 if (n == 0) 923 return XML_TOK_PARTIAL; 924 end = ptr + n; 925 } 926 } 927 switch (BYTE_TYPE(enc, ptr)) 928 { 929 case BT_LT: 930 return PREFIX(scanLt) (enc, ptr + MINBPC(enc), end, nextTokPtr); 931 case BT_AMP: 932 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr); 933 case BT_CR: 934 ptr += MINBPC(enc); 935 if (ptr == end) 936 return XML_TOK_TRAILING_CR; 937 if (BYTE_TYPE(enc, ptr) == BT_LF) 938 ptr += MINBPC(enc); 939 *nextTokPtr = ptr; 940 return XML_TOK_DATA_NEWLINE; 941 case BT_LF: 942 *nextTokPtr = ptr + MINBPC(enc); 943 return XML_TOK_DATA_NEWLINE; 944 case BT_RSQB: 945 ptr += MINBPC(enc); 946 if (ptr == end) 947 return XML_TOK_TRAILING_RSQB; 948 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 949 break; 950 ptr += MINBPC(enc); 951 if (ptr == end) 952 return XML_TOK_TRAILING_RSQB; 953 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) 954 { 955 ptr -= MINBPC(enc); 956 break; 957 } 958 *nextTokPtr = ptr; 959 return XML_TOK_INVALID; 960 INVALID_CASES(ptr, nextTokPtr) 961 default: 962 ptr += MINBPC(enc); 963 break; 964 } 965 while (ptr != end) 966 { 967 switch (BYTE_TYPE(enc, ptr)) 968 { 803 return XML_TOK_TRAILING_CR; 804 if (BYTE_TYPE(enc, ptr) == BT_LF) 805 ptr += MINBPC(enc); 806 *nextTokPtr = ptr; 807 return XML_TOK_DATA_NEWLINE; 808 case BT_LF: 809 *nextTokPtr = ptr + MINBPC(enc); 810 return XML_TOK_DATA_NEWLINE; 811 case BT_RSQB: 812 ptr += MINBPC(enc); 813 if (ptr == end) 814 return XML_TOK_TRAILING_RSQB; 815 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 816 break; 817 ptr += MINBPC(enc); 818 if (ptr == end) 819 return XML_TOK_TRAILING_RSQB; 820 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { 821 ptr -= MINBPC(enc); 822 break; 823 } 824 *nextTokPtr = ptr; 825 return XML_TOK_INVALID; 826 INVALID_CASES(ptr, nextTokPtr) 827 default: 828 ptr += MINBPC(enc); 829 break; 830 } 831 while (ptr != end) { 832 switch (BYTE_TYPE(enc, ptr)) { 969 833 #define LEAD_CASE(n) \ 970 834 case BT_LEAD ## n: \ … … 975 839 ptr += n; \ 976 840 break; 977 841 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 978 842 #undef LEAD_CASE 979 case BT_RSQB: 980 if (ptr + MINBPC(enc) != end) 981 { 982 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) 983 { 984 ptr += MINBPC(enc); 985 break; 986 } 987 if (ptr + 2 * MINBPC(enc) != end) 988 { 989 if (!CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) 990 { 991 ptr += MINBPC(enc); 992 break; 993 } 994 *nextTokPtr = ptr + 2 * MINBPC(enc); 995 return XML_TOK_INVALID; 996 } 997 } 998 /* fall through */ 999 case BT_AMP: 1000 case BT_LT: 1001 case BT_NONXML: 1002 case BT_MALFORM: 1003 case BT_TRAIL: 1004 case BT_CR: 1005 case BT_LF: 1006 *nextTokPtr = ptr; 1007 return XML_TOK_DATA_CHARS; 1008 default: 1009 ptr += MINBPC(enc); 1010 break; 1011 } 1012 } 1013 *nextTokPtr = ptr; 1014 return XML_TOK_DATA_CHARS; 843 case BT_RSQB: 844 if (ptr + MINBPC(enc) != end) { 845 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { 846 ptr += MINBPC(enc); 847 break; 848 } 849 if (ptr + 2*MINBPC(enc) != end) { 850 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { 851 ptr += MINBPC(enc); 852 break; 853 } 854 *nextTokPtr = ptr + 2*MINBPC(enc); 855 return XML_TOK_INVALID; 856 } 857 } 858 /* fall through */ 859 case BT_AMP: 860 case BT_LT: 861 case BT_NONXML: 862 case BT_MALFORM: 863 case BT_TRAIL: 864 case BT_CR: 865 case BT_LF: 866 *nextTokPtr = ptr; 867 return XML_TOK_DATA_CHARS; 868 default: 869 ptr += MINBPC(enc); 870 break; 871 } 872 } 873 *nextTokPtr = ptr; 874 return XML_TOK_DATA_CHARS; 1015 875 } 1016 876 … … 1018 878 1019 879 static 1020 int EXPATENTRY PREFIX(scanPercent) (const ENCODING * enc, const char *ptr, const char *end, 1021 const char **nextTokPtr) 1022 { 880 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, 881 const char **nextTokPtr) 882 { 883 if (ptr == end) 884 return XML_TOK_PARTIAL; 885 switch (BYTE_TYPE(enc, ptr)) { 886 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 887 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: 888 *nextTokPtr = ptr; 889 return XML_TOK_PERCENT; 890 default: 891 *nextTokPtr = ptr; 892 return XML_TOK_INVALID; 893 } 894 while (ptr != end) { 895 switch (BYTE_TYPE(enc, ptr)) { 896 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 897 case BT_SEMI: 898 *nextTokPtr = ptr + MINBPC(enc); 899 return XML_TOK_PARAM_ENTITY_REF; 900 default: 901 *nextTokPtr = ptr; 902 return XML_TOK_INVALID; 903 } 904 } 905 return XML_TOK_PARTIAL; 906 } 907 908 static 909 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, 910 const char **nextTokPtr) 911 { 912 if (ptr == end) 913 return XML_TOK_PARTIAL; 914 switch (BYTE_TYPE(enc, ptr)) { 915 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 916 default: 917 *nextTokPtr = ptr; 918 return XML_TOK_INVALID; 919 } 920 while (ptr != end) { 921 switch (BYTE_TYPE(enc, ptr)) { 922 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 923 case BT_CR: case BT_LF: case BT_S: 924 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: 925 *nextTokPtr = ptr; 926 return XML_TOK_POUND_NAME; 927 default: 928 *nextTokPtr = ptr; 929 return XML_TOK_INVALID; 930 } 931 } 932 return -XML_TOK_POUND_NAME; 933 } 934 935 static 936 int PREFIX(scanLit)(int open, const ENCODING *enc, 937 const char *ptr, const char *end, 938 const char **nextTokPtr) 939 { 940 while (ptr != end) { 941 int t = BYTE_TYPE(enc, ptr); 942 switch (t) { 943 INVALID_CASES(ptr, nextTokPtr) 944 case BT_QUOT: 945 case BT_APOS: 946 ptr += MINBPC(enc); 947 if (t != open) 948 break; 949 if (ptr == end) 950 return -XML_TOK_LITERAL; 951 *nextTokPtr = ptr; 952 switch (BYTE_TYPE(enc, ptr)) { 953 case BT_S: case BT_CR: case BT_LF: 954 case BT_GT: case BT_PERCNT: case BT_LSQB: 955 return XML_TOK_LITERAL; 956 default: 957 return XML_TOK_INVALID; 958 } 959 default: 960 ptr += MINBPC(enc); 961 break; 962 } 963 } 964 return XML_TOK_PARTIAL; 965 } 966 967 static 968 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, 969 const char **nextTokPtr) 970 { 971 int tok; 972 if (ptr == end) 973 return XML_TOK_NONE; 974 if (MINBPC(enc) > 1) { 975 size_t n = end - ptr; 976 if (n & (MINBPC(enc) - 1)) { 977 n &= ~(MINBPC(enc) - 1); 978 if (n == 0) 979 return XML_TOK_PARTIAL; 980 end = ptr + n; 981 } 982 } 983 switch (BYTE_TYPE(enc, ptr)) { 984 case BT_QUOT: 985 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 986 case BT_APOS: 987 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 988 case BT_LT: 989 { 990 ptr += MINBPC(enc); 991 if (ptr == end) 992 return XML_TOK_PARTIAL; 993 switch (BYTE_TYPE(enc, ptr)) { 994 case BT_EXCL: 995 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); 996 case BT_QUEST: 997 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); 998 case BT_NMSTRT: 999 case BT_HEX: 1000 case BT_NONASCII: 1001 case BT_LEAD2: 1002 case BT_LEAD3: 1003 case BT_LEAD4: 1004 *nextTokPtr = ptr - MINBPC(enc); 1005 return XML_TOK_INSTANCE_START; 1006 } 1007 *nextTokPtr = ptr; 1008 return XML_TOK_INVALID; 1009 } 1010 case BT_CR: 1011 if (ptr + MINBPC(enc) == end) 1012 return -XML_TOK_PROLOG_S; 1013 /* fall through */ 1014 case BT_S: case BT_LF: 1015 for (;;) { 1016 ptr += MINBPC(enc); 1017 if (ptr == end) 1018 break; 1019 switch (BYTE_TYPE(enc, ptr)) { 1020 case BT_S: case BT_LF: 1021 break; 1022 case BT_CR: 1023 /* don't split CR/LF pair */ 1024 if (ptr + MINBPC(enc) != end) 1025 break; 1026 /* fall through */ 1027 default: 1028 *nextTokPtr = ptr; 1029 return XML_TOK_PROLOG_S; 1030 } 1031 } 1032 *nextTokPtr = ptr; 1033 return XML_TOK_PROLOG_S; 1034 case BT_PERCNT: 1035 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1036 case BT_COMMA: 1037 *nextTokPtr = ptr + MINBPC(enc); 1038 return XML_TOK_COMMA; 1039 case BT_LSQB: 1040 *nextTokPtr = ptr + MINBPC(enc); 1041 return XML_TOK_OPEN_BRACKET; 1042 case BT_RSQB: 1043 ptr += MINBPC(enc); 1023 1044 if (ptr == end) 1024 return XML_TOK_PARTIAL; 1025 switch (BYTE_TYPE(enc, ptr)) 1026 { 1027 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1028 case BT_S: 1029 case BT_LF: 1030 case BT_CR: 1031 case BT_PERCNT: 1032 *nextTokPtr = ptr; 1033 return XML_TOK_PERCENT; 1034 default: 1035 *nextTokPtr = ptr; 1036 return XML_TOK_INVALID; 1037 } 1038 while (ptr != end) 1039 { 1040 switch (BYTE_TYPE(enc, ptr)) 1041 { 1042 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1043 case BT_SEMI: 1044 *nextTokPtr = ptr + MINBPC(enc); 1045 return XML_TOK_PARAM_ENTITY_REF; 1046 default: 1047 *nextTokPtr = ptr; 1048 return XML_TOK_INVALID; 1049 } 1050 } 1051 return XML_TOK_PARTIAL; 1052 } 1053 1054 static 1055 int EXPATENTRY PREFIX(scanPoundName) (const ENCODING * enc, const char *ptr, const char *end, 1056 const char **nextTokPtr) 1057 { 1045 return -XML_TOK_CLOSE_BRACKET; 1046 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1047 if (ptr + MINBPC(enc) == end) 1048 return XML_TOK_PARTIAL; 1049 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { 1050 *nextTokPtr = ptr + 2*MINBPC(enc); 1051 return XML_TOK_COND_SECT_CLOSE; 1052 } 1053 } 1054 *nextTokPtr = ptr; 1055 return XML_TOK_CLOSE_BRACKET; 1056 case BT_LPAR: 1057 *nextTokPtr = ptr + MINBPC(enc); 1058 return XML_TOK_OPEN_PAREN; 1059 case BT_RPAR: 1060 ptr += MINBPC(enc); 1058 1061 if (ptr == end) 1059 return XML_TOK_PARTIAL; 1060 switch (BYTE_TYPE(enc, ptr)) 1061 { 1062 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) 1063 default: 1064 *nextTokPtr = ptr; 1065 return XML_TOK_INVALID; 1066 } 1067 while (ptr != end) 1068 { 1069 switch (BYTE_TYPE(enc, ptr)) 1070 { 1071 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1072 case BT_CR: 1073 case BT_LF: 1074 case BT_S: 1075 case BT_RPAR: 1076 case BT_GT: 1077 case BT_PERCNT: 1078 case BT_VERBAR: 1079 *nextTokPtr = ptr; 1080 return XML_TOK_POUND_NAME; 1081 default: 1082 *nextTokPtr = ptr; 1083 return XML_TOK_INVALID; 1084 } 1085 } 1086 return -XML_TOK_POUND_NAME; 1087 } 1088 1089 static 1090 int EXPATENTRY PREFIX(scanLit) (int open, const ENCODING * enc, 1091 const char *ptr, const char *end, 1092 const char **nextTokPtr) 1093 { 1094 while (ptr != end) 1095 { 1096 int t = BYTE_TYPE(enc, ptr); 1097 1098 switch (t) 1099 { 1100 INVALID_CASES(ptr, nextTokPtr) 1101 case BT_QUOT: 1102 case BT_APOS: 1103 ptr += MINBPC(enc); 1104 if (t != open) 1105 break; 1106 if (ptr == end) 1107 return -XML_TOK_LITERAL; 1108 *nextTokPtr = ptr; 1109 switch (BYTE_TYPE(enc, ptr)) 1110 { 1111 case BT_S: 1112 case BT_CR: 1113 case BT_LF: 1114 case BT_GT: 1115 case BT_PERCNT: 1116 case BT_LSQB: 1117 return XML_TOK_LITERAL; 1118 default: 1119 return XML_TOK_INVALID; 1120 } 1121 default: 1122 ptr += MINBPC(enc); 1123 break; 1124 } 1125 } 1126 return XML_TOK_PARTIAL; 1127 } 1128 1129 static 1130 int EXPATENTRY PREFIX(prologTok) (const ENCODING * enc, const char *ptr, const char *end, 1131 const char **nextTokPtr) 1132 { 1133 int tok; 1134 1135 if (ptr == end) 1136 return XML_TOK_NONE; 1137 if (MINBPC(enc) > 1) 1138 { 1139 size_t n = end - ptr; 1140 1141 if (n & (MINBPC(enc) - 1)) 1142 { 1143 n &= ~(MINBPC(enc) - 1); 1144 if (n == 0) 1145 return XML_TOK_PARTIAL; 1146 end = ptr + n; 1147 } 1148 } 1149 switch (BYTE_TYPE(enc, ptr)) 1150 { 1151 case BT_QUOT: 1152 return PREFIX(scanLit) (BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); 1153 case BT_APOS: 1154 return PREFIX(scanLit) (BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); 1155 case BT_LT: 1156 { 1157 ptr += MINBPC(enc); 1158 if (ptr == end) 1159 return XML_TOK_PARTIAL; 1160 switch (BYTE_TYPE(enc, ptr)) 1161 { 1162 case BT_EXCL: 1163 return PREFIX(scanDecl) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1164 case BT_QUEST: 1165 return PREFIX(scanPi) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1166 case BT_NMSTRT: 1167 case BT_HEX: 1168 case BT_NONASCII: 1169 case BT_LEAD2: 1170 case BT_LEAD3: 1171 case BT_LEAD4: 1172 *nextTokPtr = ptr - MINBPC(enc); 1173 return XML_TOK_INSTANCE_START; 1174 } 1175 *nextTokPtr = ptr; 1176 return XML_TOK_INVALID; 1177 } 1178 case BT_CR: 1179 if (ptr + MINBPC(enc) == end) 1180 return -XML_TOK_PROLOG_S; 1181 /* fall through */ 1182 case BT_S: 1183 case BT_LF: 1184 for (;;) 1185 { 1186 ptr += MINBPC(enc); 1187 if (ptr == end) 1188 break; 1189 switch (BYTE_TYPE(enc, ptr)) 1190 { 1191 case BT_S: 1192 case BT_LF: 1193 break; 1194 case BT_CR: 1195 /* don't split CR/LF pair */ 1196 if (ptr + MINBPC(enc) != end) 1197 break; 1198 /* fall through */ 1199 default: 1200 *nextTokPtr = ptr; 1201 return XML_TOK_PROLOG_S; 1202 } 1203 } 1204 *nextTokPtr = ptr; 1205 return XML_TOK_PROLOG_S; 1206 case BT_PERCNT: 1207 return PREFIX(scanPercent) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1208 case BT_COMMA: 1209 *nextTokPtr = ptr + MINBPC(enc); 1210 return XML_TOK_COMMA; 1211 case BT_LSQB: 1212 *nextTokPtr = ptr + MINBPC(enc); 1213 return XML_TOK_OPEN_BRACKET; 1214 case BT_RSQB: 1215 ptr += MINBPC(enc); 1216 if (ptr == end) 1217 return -XML_TOK_CLOSE_BRACKET; 1218 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1219 { 1220 if (ptr + MINBPC(enc) == end) 1221 return XML_TOK_PARTIAL; 1222 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) 1223 { 1224 *nextTokPtr = ptr + 2 * MINBPC(enc); 1225 return XML_TOK_COND_SECT_CLOSE; 1226 } 1227 } 1228 *nextTokPtr = ptr; 1229 return XML_TOK_CLOSE_BRACKET; 1230 case BT_LPAR: 1231 *nextTokPtr = ptr + MINBPC(enc); 1232 return XML_TOK_OPEN_PAREN; 1233 case BT_RPAR: 1234 ptr += MINBPC(enc); 1235 if (ptr == end) 1236 return -XML_TOK_CLOSE_PAREN; 1237 switch (BYTE_TYPE(enc, ptr)) 1238 { 1239 case BT_AST: 1240 *nextTokPtr = ptr + MINBPC(enc); 1241 return XML_TOK_CLOSE_PAREN_ASTERISK; 1242 case BT_QUEST: 1243 *nextTokPtr = ptr + MINBPC(enc); 1244 return XML_TOK_CLOSE_PAREN_QUESTION; 1245 case BT_PLUS: 1246 *nextTokPtr = ptr + MINBPC(enc); 1247 return XML_TOK_CLOSE_PAREN_PLUS; 1248 case BT_CR: 1249 case BT_LF: 1250 case BT_S: 1251 case BT_GT: 1252 case BT_COMMA: 1253 case BT_VERBAR: 1254 case BT_RPAR: 1255 *nextTokPtr = ptr; 1256 return XML_TOK_CLOSE_PAREN; 1257 } 1258 *nextTokPtr = ptr; 1259 return XML_TOK_INVALID; 1260 case BT_VERBAR: 1261 *nextTokPtr = ptr + MINBPC(enc); 1262 return XML_TOK_OR; 1263 case BT_GT: 1264 *nextTokPtr = ptr + MINBPC(enc); 1265 return XML_TOK_DECL_CLOSE; 1266 case BT_NUM: 1267 return PREFIX(scanPoundName) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1062 return -XML_TOK_CLOSE_PAREN; 1063 switch (BYTE_TYPE(enc, ptr)) { 1064 case BT_AST: 1065 *nextTokPtr = ptr + MINBPC(enc); 1066 return XML_TOK_CLOSE_PAREN_ASTERISK; 1067 case BT_QUEST: 1068 *nextTokPtr = ptr + MINBPC(enc); 1069 return XML_TOK_CLOSE_PAREN_QUESTION; 1070 case BT_PLUS: 1071 *nextTokPtr = ptr + MINBPC(enc); 1072 return XML_TOK_CLOSE_PAREN_PLUS; 1073 case BT_CR: case BT_LF: case BT_S: 1074 case BT_GT: case BT_COMMA: case BT_VERBAR: 1075 case BT_RPAR: 1076 *nextTokPtr = ptr; 1077 return XML_TOK_CLOSE_PAREN; 1078 } 1079 *nextTokPtr = ptr; 1080 return XML_TOK_INVALID; 1081 case BT_VERBAR: 1082 *nextTokPtr = ptr + MINBPC(enc); 1083 return XML_TOK_OR; 1084 case BT_GT: 1085 *nextTokPtr = ptr + MINBPC(enc); 1086 return XML_TOK_DECL_CLOSE; 1087 case BT_NUM: 1088 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1268 1089 #define LEAD_CASE(n) \ 1269 1090 case BT_LEAD ## n: \ … … 1282 1103 *nextTokPtr = ptr; \ 1283 1104 return XML_TOK_INVALID; 1284 1105 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1285 1106 #undef LEAD_CASE 1286 1287 1288 1289 1290 1291 1292 1293 1107 case BT_NMSTRT: 1108 case BT_HEX: 1109 tok = XML_TOK_NAME; 1110 ptr += MINBPC(enc); 1111 break; 1112 case BT_DIGIT: 1113 case BT_NAME: 1114 case BT_MINUS: 1294 1115 #ifdef XML_NS 1295 1116 case BT_COLON: 1296 1117 #endif 1297 tok = XML_TOK_NMTOKEN; 1298 ptr += MINBPC(enc); 1299 break; 1300 case BT_NONASCII: 1301 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) 1302 { 1303 ptr += MINBPC(enc); 1304 tok = XML_TOK_NAME; 1305 break; 1306 } 1307 if (IS_NAME_CHAR_MINBPC(enc, ptr)) 1308 { 1309 ptr += MINBPC(enc); 1310 tok = XML_TOK_NMTOKEN; 1311 break; 1312 } 1313 /* fall through */ 1314 default: 1315 *nextTokPtr = ptr; 1316 return XML_TOK_INVALID; 1317 } 1318 while (ptr != end) 1319 { 1320 switch (BYTE_TYPE(enc, ptr)) 1321 { 1322 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1323 case BT_GT: 1324 case BT_RPAR: 1325 case BT_COMMA: 1326 case BT_VERBAR: 1327 case BT_LSQB: 1328 case BT_PERCNT: 1329 case BT_S: 1330 case BT_CR: 1331 case BT_LF: 1332 *nextTokPtr = ptr; 1333 return tok; 1118 tok = XML_TOK_NMTOKEN; 1119 ptr += MINBPC(enc); 1120 break; 1121 case BT_NONASCII: 1122 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { 1123 ptr += MINBPC(enc); 1124 tok = XML_TOK_NAME; 1125 break; 1126 } 1127 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { 1128 ptr += MINBPC(enc); 1129 tok = XML_TOK_NMTOKEN; 1130 break; 1131 } 1132 /* fall through */ 1133 default: 1134 *nextTokPtr = ptr; 1135 return XML_TOK_INVALID; 1136 } 1137 while (ptr != end) { 1138 switch (BYTE_TYPE(enc, ptr)) { 1139 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1140 case BT_GT: case BT_RPAR: case BT_COMMA: 1141 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: 1142 case BT_S: case BT_CR: case BT_LF: 1143 *nextTokPtr = ptr; 1144 return tok; 1334 1145 #ifdef XML_NS 1335 case BT_COLON: 1336 ptr += MINBPC(enc); 1337 switch (tok) 1338 { 1339 case XML_TOK_NAME: 1340 if (ptr == end) 1341 return XML_TOK_PARTIAL; 1342 tok = XML_TOK_PREFIXED_NAME; 1343 switch (BYTE_TYPE(enc, ptr)) 1344 { 1345 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1346 default: 1347 tok = XML_TOK_NMTOKEN; 1348 break; 1349 } 1350 break; 1351 case XML_TOK_PREFIXED_NAME: 1352 tok = XML_TOK_NMTOKEN; 1353 break; 1354 } 1355 break; 1146 case BT_COLON: 1147 ptr += MINBPC(enc); 1148 switch (tok) { 1149 case XML_TOK_NAME: 1150 if (ptr == end) 1151 return XML_TOK_PARTIAL; 1152 tok = XML_TOK_PREFIXED_NAME; 1153 switch (BYTE_TYPE(enc, ptr)) { 1154 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) 1155 default: 1156 tok = XML_TOK_NMTOKEN; 1157 break; 1158 } 1159 break; 1160 case XML_TOK_PREFIXED_NAME: 1161 tok = XML_TOK_NMTOKEN; 1162 break; 1163 } 1164 break; 1356 1165 #endif 1357 case BT_PLUS: 1358 if (tok == XML_TOK_NMTOKEN) 1359 { 1360 *nextTokPtr = ptr; 1361 return XML_TOK_INVALID; 1362 } 1363 *nextTokPtr = ptr + MINBPC(enc); 1364 return XML_TOK_NAME_PLUS; 1365 case BT_AST: 1366 if (tok == XML_TOK_NMTOKEN) 1367 { 1368 *nextTokPtr = ptr; 1369 return XML_TOK_INVALID; 1370 } 1371 *nextTokPtr = ptr + MINBPC(enc); 1372 return XML_TOK_NAME_ASTERISK; 1373 case BT_QUEST: 1374 if (tok == XML_TOK_NMTOKEN) 1375 { 1376 *nextTokPtr = ptr; 1377 return XML_TOK_INVALID; 1378 } 1379 *nextTokPtr = ptr + MINBPC(enc); 1380 return XML_TOK_NAME_QUESTION; 1381 default: 1382 *nextTokPtr = ptr; 1383 return XML_TOK_INVALID; 1384 } 1385 } 1386 return -tok; 1387 } 1388 1389 static 1390 int EXPATENTRY PREFIX(attributeValueTok) (const ENCODING * enc, const char *ptr, const char *end, 1391 const char **nextTokPtr) 1392 { 1393 const char *start; 1394 1395 if (ptr == end) 1396 return XML_TOK_NONE; 1397 start = ptr; 1398 while (ptr != end) 1399 { 1400 switch (BYTE_TYPE(enc, ptr)) 1401 { 1166 case BT_PLUS: 1167 if (tok == XML_TOK_NMTOKEN) { 1168 *nextTokPtr = ptr; 1169 return XML_TOK_INVALID; 1170 } 1171 *nextTokPtr = ptr + MINBPC(enc); 1172 return XML_TOK_NAME_PLUS; 1173 case BT_AST: 1174 if (tok == XML_TOK_NMTOKEN) { 1175 *nextTokPtr = ptr; 1176 return XML_TOK_INVALID; 1177 } 1178 *nextTokPtr = ptr + MINBPC(enc); 1179 return XML_TOK_NAME_ASTERISK; 1180 case BT_QUEST: 1181 if (tok == XML_TOK_NMTOKEN) { 1182 *nextTokPtr = ptr; 1183 return XML_TOK_INVALID; 1184 } 1185 *nextTokPtr = ptr + MINBPC(enc); 1186 return XML_TOK_NAME_QUESTION; 1187 default: 1188 *nextTokPtr = ptr; 1189 return XML_TOK_INVALID; 1190 } 1191 } 1192 return -tok; 1193 } 1194 1195 static 1196 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, 1197 const char **nextTokPtr) 1198 { 1199 const char *start; 1200 if (ptr == end) 1201 return XML_TOK_NONE; 1202 start = ptr; 1203 while (ptr != end) { 1204 switch (BYTE_TYPE(enc, ptr)) { 1402 1205 #define LEAD_CASE(n) \ 1403 1206 case BT_LEAD ## n: ptr += n; break; 1404 1207 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1405 1208 #undef LEAD_CASE 1406 case BT_AMP: 1407 if (ptr == start) 1408 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1409 *nextTokPtr = ptr; 1410 return XML_TOK_DATA_CHARS; 1411 case BT_LT: 1412 /* this is for inside entity references */ 1413 *nextTokPtr = ptr; 1414 return XML_TOK_INVALID; 1415 case BT_LF: 1416 if (ptr == start) 1417 { 1418 *nextTokPtr = ptr + MINBPC(enc); 1419 return XML_TOK_DATA_NEWLINE; 1420 } 1421 *nextTokPtr = ptr; 1422 return XML_TOK_DATA_CHARS; 1423 case BT_CR: 1424 if (ptr == start) 1425 { 1426 ptr += MINBPC(enc); 1427 if (ptr == end) 1428 return XML_TOK_TRAILING_CR; 1429 if (BYTE_TYPE(enc, ptr) == BT_LF) 1430 ptr += MINBPC(enc); 1431 *nextTokPtr = ptr; 1432 return XML_TOK_DATA_NEWLINE; 1433 } 1434 *nextTokPtr = ptr; 1435 return XML_TOK_DATA_CHARS; 1436 case BT_S: 1437 if (ptr == start) 1438 { 1439 *nextTokPtr = ptr + MINBPC(enc); 1440 return XML_TOK_ATTRIBUTE_VALUE_S; 1441 } 1442 *nextTokPtr = ptr; 1443 return XML_TOK_DATA_CHARS; 1444 default: 1445 ptr += MINBPC(enc); 1446 break; 1447 } 1448 } 1449 *nextTokPtr = ptr; 1450 return XML_TOK_DATA_CHARS; 1451 } 1452 1453 static 1454 int EXPATENTRY PREFIX(entityValueTok) (const ENCODING * enc, const char *ptr, const char *end, 1455 const char **nextTokPtr) 1456 { 1457 const char *start; 1458 1209 case BT_AMP: 1210 if (ptr == start) 1211 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1212 *nextTokPtr = ptr; 1213 return XML_TOK_DATA_CHARS; 1214 case BT_LT: 1215 /* this is for inside entity references */ 1216 *nextTokPtr = ptr; 1217 return XML_TOK_INVALID; 1218 case BT_LF: 1219 if (ptr == start) { 1220 *nextTokPtr = ptr + MINBPC(enc); 1221 return XML_TOK_DATA_NEWLINE; 1222 } 1223 *nextTokPtr = ptr; 1224 return XML_TOK_DATA_CHARS; 1225 case BT_CR: 1226 if (ptr == start) { 1227 ptr += MINBPC(enc); 1459 1228 if (ptr == end) 1460 return XML_TOK_NONE; 1461 start = ptr; 1462 while (ptr != end) 1463 { 1464 switch (BYTE_TYPE(enc, ptr)) 1465 { 1229 return XML_TOK_TRAILING_CR; 1230 if (BYTE_TYPE(enc, ptr) == BT_LF) 1231 ptr += MINBPC(enc); 1232 *nextTokPtr = ptr; 1233 return XML_TOK_DATA_NEWLINE; 1234 } 1235 *nextTokPtr = ptr; 1236 return XML_TOK_DATA_CHARS; 1237 case BT_S: 1238 if (ptr == start) { 1239 *nextTokPtr = ptr + MINBPC(enc); 1240 return XML_TOK_ATTRIBUTE_VALUE_S; 1241 } 1242 *nextTokPtr = ptr; 1243 return XML_TOK_DATA_CHARS; 1244 default: 1245 ptr += MINBPC(enc); 1246 break; 1247 } 1248 } 1249 *nextTokPtr = ptr; 1250 return XML_TOK_DATA_CHARS; 1251 } 1252 1253 static 1254 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, 1255 const char **nextTokPtr) 1256 { 1257 const char *start; 1258 if (ptr == end) 1259 return XML_TOK_NONE; 1260 start = ptr; 1261 while (ptr != end) { 1262 switch (BYTE_TYPE(enc, ptr)) { 1466 1263 #define LEAD_CASE(n) \ 1467 1264 case BT_LEAD ## n: ptr += n; break; 1468 1265 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1469 1266 #undef LEAD_CASE 1470 case BT_AMP: 1471 if (ptr == start) 1472 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr); 1473 *nextTokPtr = ptr; 1474 return XML_TOK_DATA_CHARS; 1475 case BT_PERCNT: 1476 if (ptr == start) 1477 { 1478 int tok = PREFIX(scanPercent) (enc, ptr + MINBPC(enc), 1479 end, nextTokPtr); 1480 1481 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1482 } 1483 *nextTokPtr = ptr; 1484 return XML_TOK_DATA_CHARS; 1485 case BT_LF: 1486 if (ptr == start) 1487 { 1488 *nextTokPtr = ptr + MINBPC(enc); 1489 return XML_TOK_DATA_NEWLINE; 1490 } 1491 *nextTokPtr = ptr; 1492 return XML_TOK_DATA_CHARS; 1493 case BT_CR: 1494 if (ptr == start) 1495 { 1496 ptr += MINBPC(enc); 1497 if (ptr == end) 1498 return XML_TOK_TRAILING_CR; 1499 if (BYTE_TYPE(enc, ptr) == BT_LF) 1500 ptr += MINBPC(enc); 1501 *nextTokPtr = ptr; 1502 return XML_TOK_DATA_NEWLINE; 1503 } 1504 *nextTokPtr = ptr; 1505 return XML_TOK_DATA_CHARS; 1506 default: 1507 ptr += MINBPC(enc); 1508 break; 1509 } 1510 } 1511 *nextTokPtr = ptr; 1512 return XML_TOK_DATA_CHARS; 1267 case BT_AMP: 1268 if (ptr == start) 1269 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); 1270 *nextTokPtr = ptr; 1271 return XML_TOK_DATA_CHARS; 1272 case BT_PERCNT: 1273 if (ptr == start) { 1274 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), 1275 end, nextTokPtr); 1276 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; 1277 } 1278 *nextTokPtr = ptr; 1279 return XML_TOK_DATA_CHARS; 1280 case BT_LF: 1281 if (ptr == start) { 1282 *nextTokPtr = ptr + MINBPC(enc); 1283 return XML_TOK_DATA_NEWLINE; 1284 } 1285 *nextTokPtr = ptr; 1286 return XML_TOK_DATA_CHARS; 1287 case BT_CR: 1288 if (ptr == start) { 1289 ptr += MINBPC(enc); 1290 if (ptr == end) 1291 return XML_TOK_TRAILING_CR; 1292 if (BYTE_TYPE(enc, ptr) == BT_LF) 1293 ptr += MINBPC(enc); 1294 *nextTokPtr = ptr; 1295 return XML_TOK_DATA_NEWLINE; 1296 } 1297 *nextTokPtr = ptr; 1298 return XML_TOK_DATA_CHARS; 1299 default: 1300 ptr += MINBPC(enc); 1301 break; 1302 } 1303 } 1304 *nextTokPtr = ptr; 1305 return XML_TOK_DATA_CHARS; 1513 1306 } 1514 1307 … … 1516 1309 1517 1310 static 1518 int EXPATENTRY PREFIX(ignoreSectionTok) (const ENCODING * enc, const char *ptr, const char *end, 1519 const char **nextTokPtr) 1520 { 1521 int level = 0; 1522 1523 if (MINBPC(enc) > 1) 1524 { 1525 size_t n = end - ptr; 1526 1527 if (n & (MINBPC(enc) - 1)) 1528 { 1529 n &= ~(MINBPC(enc) - 1); 1530 end = ptr + n; 1531 } 1532 } 1533 while (ptr != end) 1534 { 1535 switch (BYTE_TYPE(enc, ptr)) 1536 { 1537 INVALID_CASES(ptr, nextTokPtr) 1538 case BT_LT: 1539 if ((ptr += MINBPC(enc)) == end) 1540 return XML_TOK_PARTIAL; 1541 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) 1542 { 1543 if ((ptr += MINBPC(enc)) == end) 1544 return XML_TOK_PARTIAL; 1545 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) 1546 { 1547 ++level; 1548 ptr += MINBPC(enc); 1549 } 1550 } 1551 break; 1552 case BT_RSQB: 1553 if ((ptr += MINBPC(enc)) == end) 1554 return XML_TOK_PARTIAL; 1555 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) 1556 { 1557 if ((ptr += MINBPC(enc)) == end) 1558 return XML_TOK_PARTIAL; 1559 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) 1560 { 1561 ptr += MINBPC(enc); 1562 if (level == 0) 1563 { 1564 *nextTokPtr = ptr; 1565 return XML_TOK_IGNORE_SECT; 1566 } 1567 --level; 1568 } 1569 } 1570 break; 1571 default: 1572 ptr += MINBPC(enc); 1573 break; 1574 } 1575 } 1576 return XML_TOK_PARTIAL; 1311 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, 1312 const char **nextTokPtr) 1313 { 1314 int level = 0; 1315 if (MINBPC(enc) > 1) { 1316 size_t n = end - ptr; 1317 if (n & (MINBPC(enc) - 1)) { 1318 n &= ~(MINBPC(enc) - 1); 1319 end = ptr + n; 1320 } 1321 } 1322 while (ptr != end) { 1323 switch (BYTE_TYPE(enc, ptr)) { 1324 INVALID_CASES(ptr, nextTokPtr) 1325 case BT_LT: 1326 if ((ptr += MINBPC(enc)) == end) 1327 return XML_TOK_PARTIAL; 1328 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { 1329 if ((ptr += MINBPC(enc)) == end) 1330 return XML_TOK_PARTIAL; 1331 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { 1332 ++level; 1333 ptr += MINBPC(enc); 1334 } 1335 } 1336 break; 1337 case BT_RSQB: 1338 if ((ptr += MINBPC(enc)) == end) 1339 return XML_TOK_PARTIAL; 1340 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { 1341 if ((ptr += MINBPC(enc)) == end) 1342 return XML_TOK_PARTIAL; 1343 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { 1344 ptr += MINBPC(enc); 1345 if (level == 0) { 1346 *nextTokPtr = ptr; 1347 return XML_TOK_IGNORE_SECT; 1348 } 1349 --level; 1350 } 1351 } 1352 break; 1353 default: 1354 ptr += MINBPC(enc); 1355 break; 1356 } 1357 } 1358 return XML_TOK_PARTIAL; 1577 1359 } 1578 1360 … … 1580 1362 1581 1363 static 1582 int EXPATENTRY PREFIX(isPublicId) (const ENCODING * enc, const char *ptr, const char *end, 1583 const char **badPtr) 1584 { 1585 ptr += MINBPC(enc); 1586 end -= MINBPC(enc); 1587 for (; ptr != end; ptr += MINBPC(enc)) 1588 { 1589 switch (BYTE_TYPE(enc, ptr)) 1590 { 1591 case BT_DIGIT: 1592 case BT_HEX: 1593 case BT_MINUS: 1594 case BT_APOS: 1595 case BT_LPAR: 1596 case BT_RPAR: 1597 case BT_PLUS: 1598 case BT_COMMA: 1599 case BT_SOL: 1600 case BT_EQUALS: 1601 case BT_QUEST: 1602 case BT_CR: 1603 case BT_LF: 1604 case BT_SEMI: 1605 case BT_EXCL: 1606 case BT_AST: 1607 case BT_PERCNT: 1608 case BT_NUM: 1364 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, 1365 const char **badPtr) 1366 { 1367 ptr += MINBPC(enc); 1368 end -= MINBPC(enc); 1369 for (; ptr != end; ptr += MINBPC(enc)) { 1370 switch (BYTE_TYPE(enc, ptr)) { 1371 case BT_DIGIT: 1372 case BT_HEX: 1373 case BT_MINUS: 1374 case BT_APOS: 1375 case BT_LPAR: 1376 case BT_RPAR: 1377 case BT_PLUS: 1378 case BT_COMMA: 1379 case BT_SOL: 1380 case BT_EQUALS: 1381 case BT_QUEST: 1382 case BT_CR: 1383 case BT_LF: 1384 case BT_SEMI: 1385 case BT_EXCL: 1386 case BT_AST: 1387 case BT_PERCNT: 1388 case BT_NUM: 1609 1389 #ifdef XML_NS 1610 1390 case BT_COLON: 1611 1391 #endif 1612 break; 1613 case BT_S: 1614 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) 1615 { 1616 *badPtr = ptr; 1617 return 0; 1618 } 1619 break; 1620 case BT_NAME: 1621 case BT_NMSTRT: 1622 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 1623 break; 1624 default: 1625 switch (BYTE_TO_ASCII(enc, ptr)) 1626 { 1627 case 0x24: /* $ */ 1628 case 0x40: /* @ */ 1629 break; 1630 default: 1631 *badPtr = ptr; 1632 return 0; 1633 } 1634 break; 1635 } 1636 } 1637 return 1; 1392 break; 1393 case BT_S: 1394 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { 1395 *badPtr = ptr; 1396 return 0; 1397 } 1398 break; 1399 case BT_NAME: 1400 case BT_NMSTRT: 1401 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) 1402 break; 1403 default: 1404 switch (BYTE_TO_ASCII(enc, ptr)) { 1405 case 0x24: /* $ */ 1406 case 0x40: /* @ */ 1407 break; 1408 default: 1409 *badPtr = ptr; 1410 return 0; 1411 } 1412 break; 1413 } 1414 } 1415 return 1; 1638 1416 } 1639 1417 1640 1418 /* This must only be called for a well-formed start-tag or empty element tag. 1641 * Returns the number of attributes. Pointers to the first attsMax attributes 1642 * are stored in atts. */ 1643 1644 static 1645 int EXPATENTRY PREFIX(getAtts) (const ENCODING * enc, const char *ptr, 1646 int attsMax, ATTRIBUTE * atts) 1647 { 1648 enum 1649 { 1650 other, inName, inValue 1651 } 1652 state = inName; 1653 int nAtts = 0; 1654 int open = 0; /* defined when state == inValue; 1655 1656 * initialization just to shut up compilers */ 1657 1658 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) 1659 { 1660 switch (BYTE_TYPE(enc, ptr)) 1661 { 1419 Returns the number of attributes. Pointers to the first attsMax attributes 1420 are stored in atts. */ 1421 1422 static 1423 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, 1424 int attsMax, ATTRIBUTE *atts) 1425 { 1426 enum { other, inName, inValue } state = inName; 1427 int nAtts = 0; 1428 int open = 0; /* defined when state == inValue; 1429 initialization just to shut up compilers */ 1430 1431 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { 1432 switch (BYTE_TYPE(enc, ptr)) { 1662 1433 #define START_NAME \ 1663 1434 if (state == other) { \ … … 1670 1441 #define LEAD_CASE(n) \ 1671 1442 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 1672 1443 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1673 1444 #undef LEAD_CASE 1674 1675 1676 1677 1678 1445 case BT_NONASCII: 1446 case BT_NMSTRT: 1447 case BT_HEX: 1448 START_NAME 1449 break; 1679 1450 #undef START_NAME 1680 case BT_QUOT: 1681 if (state != inValue) 1682 { 1683 if (nAtts < attsMax) 1684 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1685 state = inValue; 1686 open = BT_QUOT; 1687 } 1688 else if (open == BT_QUOT) 1689 { 1690 state = other; 1691 if (nAtts < attsMax) 1692 atts[nAtts].valueEnd = ptr; 1693 nAtts++; 1694 } 1695 break; 1696 case BT_APOS: 1697 if (state != inValue) 1698 { 1699 if (nAtts < attsMax) 1700 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1701 state = inValue; 1702 open = BT_APOS; 1703 } 1704 else if (open == BT_APOS) 1705 { 1706 state = other; 1707 if (nAtts < attsMax) 1708 atts[nAtts].valueEnd = ptr; 1709 nAtts++; 1710 } 1711 break; 1712 case BT_AMP: 1713 if (nAtts < attsMax) 1714 atts[nAtts].normalized = 0; 1715 break; 1716 case BT_S: 1717 if (state == inName) 1718 state = other; 1719 else if (state == inValue 1720 && nAtts < attsMax 1721 && atts[nAtts].normalized 1722 && (ptr == atts[nAtts].valuePtr 1723 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 1724 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 1725 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1726 atts[nAtts].normalized = 0; 1727 break; 1728 case BT_CR: 1729 case BT_LF: 1730 /* This case ensures that the first attribute name is counted 1731 * Apart from that we could just change state on the quote. */ 1732 if (state == inName) 1733 state = other; 1734 else if (state == inValue && nAtts < attsMax) 1735 atts[nAtts].normalized = 0; 1736 break; 1737 case BT_GT: 1738 case BT_SOL: 1739 if (state != inValue) 1740 return nAtts; 1741 break; 1742 default: 1743 break; 1744 } 1745 } 1746 /* not reached */ 1747 } 1748 1749 static 1750 int EXPATENTRY PREFIX(charRefNumber) (const ENCODING * enc, const char *ptr) 1751 { 1752 int result = 0; 1753 1754 /* skip &# */ 1755 ptr += 2 * MINBPC(enc); 1756 if (CHAR_MATCHES(enc, ptr, ASCII_x)) 1757 { 1758 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) 1759 { 1760 int c = BYTE_TO_ASCII(enc, ptr); 1761 1762 switch (c) 1763 { 1764 case ASCII_0: 1765 case ASCII_1: 1766 case ASCII_2: 1767 case ASCII_3: 1768 case ASCII_4: 1769 case ASCII_5: 1770 case ASCII_6: 1771 case ASCII_7: 1772 case ASCII_8: 1773 case ASCII_9: 1774 result <<= 4; 1775 result |= (c - ASCII_0); 1776 break; 1777 case ASCII_A: 1778 case ASCII_B: 1779 case ASCII_C: 1780 case ASCII_D: 1781 case ASCII_E: 1782 case ASCII_F: 1783 result <<= 4; 1784 result += 10 + (c - ASCII_A); 1785 break; 1786 case ASCII_a: 1787 case ASCII_b: 1788 case ASCII_c: 1789 case ASCII_d: 1790 case ASCII_e: 1791 case ASCII_f: 1792 result <<= 4; 1793 result += 10 + (c - ASCII_a); 1794 break; 1795 } 1796 if (result >= 0x110000) 1797 return -1; 1798 } 1799 } 1800 else 1801 { 1802 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) 1803 { 1804 int c = BYTE_TO_ASCII(enc, ptr); 1805 1806 result *= 10; 1807 result += (c - ASCII_0); 1808 if (result >= 0x110000) 1809 return -1; 1810 } 1811 } 1812 return checkCharRefNumber(result); 1813 } 1814 1815 static 1816 int EXPATENTRY PREFIX(predefinedEntityName) (const ENCODING * enc, const char *ptr, const char *end) 1817 { 1818 switch ((end - ptr) / MINBPC(enc)) 1819 { 1820 case 2: 1821 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) 1822 { 1823 switch (BYTE_TO_ASCII(enc, ptr)) 1824 { 1825 case ASCII_l: 1826 return ASCII_LT; 1827 case ASCII_g: 1828 return ASCII_GT; 1829 } 1830 } 1831 break; 1832 case 3: 1833 if (CHAR_MATCHES(enc, ptr, ASCII_a)) 1834 { 1835 ptr += MINBPC(enc); 1836 if (CHAR_MATCHES(enc, ptr, ASCII_m)) 1837 { 1838 ptr += MINBPC(enc); 1839 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1840 return ASCII_AMP; 1841 } 1842 } 1843 break; 1844 case 4: 1845 switch (BYTE_TO_ASCII(enc, ptr)) 1846 { 1847 case ASCII_q: 1848 ptr += MINBPC(enc); 1849 if (CHAR_MATCHES(enc, ptr, ASCII_u)) 1850 { 1851 ptr += MINBPC(enc); 1852 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1853 { 1854 ptr += MINBPC(enc); 1855 if (CHAR_MATCHES(enc, ptr, ASCII_t)) 1856 return ASCII_QUOT; 1857 } 1858 } 1859 break; 1860 case ASCII_a: 1861 ptr += MINBPC(enc); 1862 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1863 { 1864 ptr += MINBPC(enc); 1865 if (CHAR_MATCHES(enc, ptr, ASCII_o)) 1866 { 1867 ptr += MINBPC(enc); 1868 if (CHAR_MATCHES(enc, ptr, ASCII_s)) 1869 return ASCII_APOS; 1870 } 1871 } 1872 break; 1873 } 1874 } 1875 return 0; 1876 } 1877 1878 static 1879 int EXPATENTRY PREFIX(sameName) (const ENCODING * enc, const char *ptr1, const char *ptr2) 1880 { 1881 for (;;) 1882 { 1883 switch (BYTE_TYPE(enc, ptr1)) 1884 { 1451 case BT_QUOT: 1452 if (state != inValue) { 1453 if (nAtts < attsMax) 1454 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1455 state = inValue; 1456 open = BT_QUOT; 1457 } 1458 else if (open == BT_QUOT) { 1459 state = other; 1460 if (nAtts < attsMax) 1461 atts[nAtts].valueEnd = ptr; 1462 nAtts++; 1463 } 1464 break; 1465 case BT_APOS: 1466 if (state != inValue) { 1467 if (nAtts < attsMax) 1468 atts[nAtts].valuePtr = ptr + MINBPC(enc); 1469 state = inValue; 1470 open = BT_APOS; 1471 } 1472 else if (open == BT_APOS) { 1473 state = other; 1474 if (nAtts < attsMax) 1475 atts[nAtts].valueEnd = ptr; 1476 nAtts++; 1477 } 1478 break; 1479 case BT_AMP: 1480 if (nAtts < attsMax) 1481 atts[nAtts].normalized = 0; 1482 break; 1483 case BT_S: 1484 if (state == inName) 1485 state = other; 1486 else if (state == inValue 1487 && nAtts < attsMax 1488 && atts[nAtts].normalized 1489 && (ptr == atts[nAtts].valuePtr 1490 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE 1491 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE 1492 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) 1493 atts[nAtts].normalized = 0; 1494 break; 1495 case BT_CR: case BT_LF: 1496 /* This case ensures that the first attribute name is counted 1497 Apart from that we could just change state on the quote. */ 1498 if (state == inName) 1499 state = other; 1500 else if (state == inValue && nAtts < attsMax) 1501 atts[nAtts].normalized = 0; 1502 break; 1503 case BT_GT: 1504 case BT_SOL: 1505 if (state != inValue) 1506 return nAtts; 1507 break; 1508 default: 1509 break; 1510 } 1511 } 1512 /* not reached */ 1513 } 1514 1515 static 1516 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) 1517 { 1518 int result = 0; 1519 /* skip &# */ 1520 ptr += 2*MINBPC(enc); 1521 if (CHAR_MATCHES(enc, ptr, ASCII_x)) { 1522 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1523 int c = BYTE_TO_ASCII(enc, ptr); 1524 switch (c) { 1525 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: 1526 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: 1527 result <<= 4; 1528 result |= (c - ASCII_0); 1529 break; 1530 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: 1531 result <<= 4; 1532 result += 10 + (c - ASCII_A); 1533 break; 1534 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: 1535 result <<= 4; 1536 result += 10 + (c - ASCII_a); 1537 break; 1538 } 1539 if (result >= 0x110000) 1540 return -1; 1541 } 1542 } 1543 else { 1544 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { 1545 int c = BYTE_TO_ASCII(enc, ptr); 1546 result *= 10; 1547 result += (c - ASCII_0); 1548 if (result >= 0x110000) 1549 return -1; 1550 } 1551 } 1552 return checkCharRefNumber(result); 1553 } 1554 1555 static 1556 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) 1557 { 1558 switch ((end - ptr)/MINBPC(enc)) { 1559 case 2: 1560 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { 1561 switch (BYTE_TO_ASCII(enc, ptr)) { 1562 case ASCII_l: 1563 return ASCII_LT; 1564 case ASCII_g: 1565 return ASCII_GT; 1566 } 1567 } 1568 break; 1569 case 3: 1570 if (CHAR_MATCHES(enc, ptr, ASCII_a)) { 1571 ptr += MINBPC(enc); 1572 if (CHAR_MATCHES(enc, ptr, ASCII_m)) { 1573 ptr += MINBPC(enc); 1574 if (CHAR_MATCHES(enc, ptr, ASCII_p)) 1575 return ASCII_AMP; 1576 } 1577 } 1578 break; 1579 case 4: 1580 switch (BYTE_TO_ASCII(enc, ptr)) { 1581 case ASCII_q: 1582 ptr += MINBPC(enc); 1583 if (CHAR_MATCHES(enc, ptr, ASCII_u)) { 1584 ptr += MINBPC(enc); 1585 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1586 ptr += MINBPC(enc); 1587 if (CHAR_MATCHES(enc, ptr, ASCII_t)) 1588 return ASCII_QUOT; 1589 } 1590 } 1591 break; 1592 case ASCII_a: 1593 ptr += MINBPC(enc); 1594 if (CHAR_MATCHES(enc, ptr, ASCII_p)) { 1595 ptr += MINBPC(enc); 1596 if (CHAR_MATCHES(enc, ptr, ASCII_o)) { 1597 ptr += MINBPC(enc); 1598 if (CHAR_MATCHES(enc, ptr, ASCII_s)) 1599 return ASCII_APOS; 1600 } 1601 } 1602 break; 1603 } 1604 } 1605 return 0; 1606 } 1607 1608 static 1609 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) 1610 { 1611 for (;;) { 1612 switch (BYTE_TYPE(enc, ptr1)) { 1885 1613 #define LEAD_CASE(n) \ 1886 1614 case BT_LEAD ## n: \ 1887 1615 if (*ptr1++ != *ptr2++) \ 1888 1616 return 0; 1889 1617 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) 1890 1618 #undef LEAD_CASE 1891 1892 1893 1894 1895 1896 1619 /* fall through */ 1620 if (*ptr1++ != *ptr2++) 1621 return 0; 1622 break; 1623 case BT_NONASCII: 1624 case BT_NMSTRT: 1897 1625 #ifdef XML_NS 1898 1626 case BT_COLON: 1899 1627 #endif 1900 case BT_HEX: 1901 case BT_DIGIT: 1902 case BT_NAME: 1903 case BT_MINUS: 1904 if (*ptr2++ != *ptr1++) 1905 return 0; 1906 if (MINBPC(enc) > 1) 1907 { 1908 if (*ptr2++ != *ptr1++) 1909 return 0; 1910 if (MINBPC(enc) > 2) 1911 { 1912 if (*ptr2++ != *ptr1++) 1913 return 0; 1914 if (MINBPC(enc) > 3) 1915 { 1916 if (*ptr2++ != *ptr1++) 1917 return 0; 1918 } 1919 } 1920 } 1921 break; 1922 default: 1923 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) 1924 return 1; 1925 switch (BYTE_TYPE(enc, ptr2)) 1926 { 1927 case BT_LEAD2: 1928 case BT_LEAD3: 1929 case BT_LEAD4: 1930 case BT_NONASCII: 1931 case BT_NMSTRT: 1628 case BT_HEX: 1629 case BT_DIGIT: 1630 case BT_NAME: 1631 case BT_MINUS: 1632 if (*ptr2++ != *ptr1++) 1633 return 0; 1634 if (MINBPC(enc) > 1) { 1635 if (*ptr2++ != *ptr1++) 1636 return 0; 1637 if (MINBPC(enc) > 2) { 1638 if (*ptr2++ != *ptr1++) 1639 return 0; 1640 if (MINBPC(enc) > 3) { 1641 if (*ptr2++ != *ptr1++) 1642 return 0; 1643 } 1644 } 1645 } 1646 break; 1647 default: 1648 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) 1649 return 1; 1650 switch (BYTE_TYPE(enc, ptr2)) { 1651 case BT_LEAD2: 1652 case BT_LEAD3: 1653 case BT_LEAD4: 1654 case BT_NONASCII: 1655 case BT_NMSTRT: 1932 1656 #ifdef XML_NS 1933 1657 case BT_COLON: 1934 1658 #endif 1935 case BT_HEX: 1936 case BT_DIGIT: 1937 case BT_NAME: 1938 case BT_MINUS: 1939 return 0; 1940 default: 1941 return 1; 1942 } 1943 } 1944 } 1945 /* not reached */ 1946 } 1947 1948 static 1949 int EXPATENTRY PREFIX(nameMatchesAscii) (const ENCODING * enc, const char *ptr1, 1950 const char *end1, const char *ptr2) 1951 { 1952 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) 1953 { 1954 if (ptr1 == end1) 1955 return 0; 1956 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 1957 return 0; 1958 } 1959 return ptr1 == end1; 1960 } 1961 1962 static 1963 int EXPATENTRY PREFIX(nameLength) (const ENCODING * enc, const char *ptr) 1964 { 1965 const char *start = ptr; 1966 1967 for (;;) 1968 { 1969 switch (BYTE_TYPE(enc, ptr)) 1970 { 1659 case BT_HEX: 1660 case BT_DIGIT: 1661 case BT_NAME: 1662 case BT_MINUS: 1663 return 0; 1664 default: 1665 return 1; 1666 } 1667 } 1668 } 1669 /* not reached */ 1670 } 1671 1672 static 1673 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, 1674 const char *end1, const char *ptr2) 1675 { 1676 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { 1677 if (ptr1 == end1) 1678 return 0; 1679 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) 1680 return 0; 1681 } 1682 return ptr1 == end1; 1683 } 1684 1685 static 1686 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) 1687 { 1688 const char *start = ptr; 1689 for (;;) { 1690 switch (BYTE_TYPE(enc, ptr)) { 1971 1691 #define LEAD_CASE(n) \ 1972 1692 case BT_LEAD ## n: ptr += n; break; 1973 1693 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 1974 1694 #undef LEAD_CASE 1975 1976 1695 case BT_NONASCII: 1696 case BT_NMSTRT: 1977 1697 #ifdef XML_NS 1978 1698 case BT_COLON: 1979 1699 #endif 1980 case BT_HEX: 1981 case BT_DIGIT: 1982 case BT_NAME: 1983 case BT_MINUS: 1984 ptr += MINBPC(enc); 1985 break; 1986 default: 1987 return ptr - start; 1988 } 1989 } 1990 } 1991 1992 static 1993 const char *EXPATENTRY PREFIX(skipS) (const ENCODING * enc, const char *ptr) 1994 { 1995 for (;;) 1996 { 1997 switch (BYTE_TYPE(enc, ptr)) 1998 { 1999 case BT_LF: 2000 case BT_CR: 2001 case BT_S: 2002 ptr += MINBPC(enc); 2003 break; 2004 default: 2005 return ptr; 2006 } 2007 } 2008 } 2009 2010 static 2011 void EXPATENTRY PREFIX(updatePosition) (const ENCODING * enc, 2012 const char *ptr, 2013 const char *end, 2014 POSITION * pos) 2015 { 2016 while (ptr != end) 2017 { 2018 switch (BYTE_TYPE(enc, ptr)) 2019 { 1700 case BT_HEX: 1701 case BT_DIGIT: 1702 case BT_NAME: 1703 case BT_MINUS: 1704 ptr += MINBPC(enc); 1705 break; 1706 default: 1707 return ptr - start; 1708 } 1709 } 1710 } 1711 1712 static 1713 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) 1714 { 1715 for (;;) { 1716 switch (BYTE_TYPE(enc, ptr)) { 1717 case BT_LF: 1718 case BT_CR: 1719 case BT_S: 1720 ptr += MINBPC(enc); 1721 break; 1722 default: 1723 return ptr; 1724 } 1725 } 1726 } 1727 1728 static 1729 void PREFIX(updatePosition)(const ENCODING *enc, 1730 const char *ptr, 1731 const char *end, 1732 POSITION *pos) 1733 { 1734 while (ptr != end) { 1735 switch (BYTE_TYPE(enc, ptr)) { 2020 1736 #define LEAD_CASE(n) \ 2021 1737 case BT_LEAD ## n: \ 2022 1738 ptr += n; \ 2023 1739 break; 2024 1740 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 2025 1741 #undef LEAD_CASE 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 1742 case BT_LF: 1743 pos->columnNumber = (unsigned)-1; 1744 pos->lineNumber++; 1745 ptr += MINBPC(enc); 1746 break; 1747 case BT_CR: 1748 pos->lineNumber++; 1749 ptr += MINBPC(enc); 1750 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) 1751 ptr += MINBPC(enc); 1752 pos->columnNumber = (unsigned)-1; 1753 break; 1754 default: 1755 ptr += MINBPC(enc); 1756 break; 1757 } 1758 pos->columnNumber++; 1759 } 2044 1760 } 2045 1761
Note:
See TracChangeset
for help on using the changeset viewer.