source: trunk/src/helpers/xmltok_impl.c@ 133

Last change on this file since 133 was 98, checked in by umoeller, 24 years ago

Misc updates.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 44.8 KB
Line 
1
2/*
3 *sourcefile xmltok_impl.c
4 * part of the expat implementation. See xmlparse.c.
5 *
6 * NOTE: This file must not be compiled directly. It is
7 * #include'd from xmltok.c several times.
8 */
9
10/*
11 * Copyright (C) 2001 Ulrich M”ller.
12 * Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd.
13 * and Clark Cooper.
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the
17 * "Software"), to deal in the Software without restriction, including
18 * without limitation the rights to use, copy, modify, merge, publish,
19 * distribute, sublicense, and/or sell copies of the Software, and to
20 * permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included
24 * in all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
29 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
30 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
31 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
32 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 */
34
35#ifndef IS_INVALID_CHAR
36#define IS_INVALID_CHAR(enc, ptr, n) (0)
37#endif
38
39#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40 case BT_LEAD ## n: \
41 if (end - ptr < n) \
42 return XML_TOK_PARTIAL_CHAR; \
43 if (IS_INVALID_CHAR(enc, ptr, n)) { \
44 *(nextTokPtr) = (ptr); \
45 return XML_TOK_INVALID; \
46 } \
47 ptr += n; \
48 break;
49
50#define INVALID_CASES(ptr, nextTokPtr) \
51 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54 case BT_NONXML: \
55 case BT_MALFORM: \
56 case BT_TRAIL: \
57 *(nextTokPtr) = (ptr); \
58 return XML_TOK_INVALID;
59
60#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61 case BT_LEAD ## n: \
62 if (end - ptr < n) \
63 return XML_TOK_PARTIAL_CHAR; \
64 if (!IS_NAME_CHAR(enc, ptr, n)) { \
65 *nextTokPtr = ptr; \
66 return XML_TOK_INVALID; \
67 } \
68 ptr += n; \
69 break;
70
71#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72 case BT_NONASCII: \
73 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74 *nextTokPtr = ptr; \
75 return XML_TOK_INVALID; \
76 } \
77 case BT_NMSTRT: \
78 case BT_HEX: \
79 case BT_DIGIT: \
80 case BT_NAME: \
81 case BT_MINUS: \
82 ptr += MINBPC(enc); \
83 break; \
84 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89 case BT_LEAD ## n: \
90 if (end - ptr < n) \
91 return XML_TOK_PARTIAL_CHAR; \
92 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93 *nextTokPtr = ptr; \
94 return XML_TOK_INVALID; \
95 } \
96 ptr += n; \
97 break;
98
99#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100 case BT_NONASCII: \
101 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102 *nextTokPtr = ptr; \
103 return XML_TOK_INVALID; \
104 } \
105 case BT_NMSTRT: \
106 case BT_HEX: \
107 ptr += MINBPC(enc); \
108 break; \
109 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113#ifndef PREFIX
114#define PREFIX(ident) ident
115#endif
116
117/* ptr points to character following "<!-" */
118
119static int EXPATENTRY PREFIX(scanComment)(const ENCODING *enc,
120 const char *ptr,
121 const char *end,
122 const char **nextTokPtr)
123{
124 if (ptr != end) {
125 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
126 *nextTokPtr = ptr;
127 return XML_TOK_INVALID;
128 }
129 ptr += MINBPC(enc);
130 while (ptr != end) {
131 switch (BYTE_TYPE(enc, ptr)) {
132 INVALID_CASES(ptr, nextTokPtr)
133 case BT_MINUS:
134 if ((ptr += MINBPC(enc)) == end)
135 return XML_TOK_PARTIAL;
136 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
137 if ((ptr += MINBPC(enc)) == end)
138 return XML_TOK_PARTIAL;
139 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
140 *nextTokPtr = ptr;
141 return XML_TOK_INVALID;
142 }
143 *nextTokPtr = ptr + MINBPC(enc);
144 return XML_TOK_COMMENT;
145 }
146 break;
147 default:
148 ptr += MINBPC(enc);
149 break;
150 }
151 }
152 }
153 return XML_TOK_PARTIAL;
154}
155
156/* ptr points to character following "<!" */
157
158static int EXPATENTRY PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
159 const char **nextTokPtr)
160{
161 if (ptr == end)
162 return XML_TOK_PARTIAL;
163 switch (BYTE_TYPE(enc, ptr)) {
164 case BT_MINUS:
165 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
166 case BT_LSQB:
167 *nextTokPtr = ptr + MINBPC(enc);
168 return XML_TOK_COND_SECT_OPEN;
169 case BT_NMSTRT:
170 case BT_HEX:
171 ptr += MINBPC(enc);
172 break;
173 default:
174 *nextTokPtr = ptr;
175 return XML_TOK_INVALID;
176 }
177 while (ptr != end) {
178 switch (BYTE_TYPE(enc, ptr)) {
179 case BT_PERCNT:
180 if (ptr + MINBPC(enc) == end)
181 return XML_TOK_PARTIAL;
182 /* don't allow <!ENTITY% foo "whatever"> */
183 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
184 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
185 *nextTokPtr = ptr;
186 return XML_TOK_INVALID;
187 }
188 /* fall through */
189 case BT_S: case BT_CR: case BT_LF:
190 *nextTokPtr = ptr;
191 return XML_TOK_DECL_OPEN;
192 case BT_NMSTRT:
193 case BT_HEX:
194 ptr += MINBPC(enc);
195 break;
196 default:
197 *nextTokPtr = ptr;
198 return XML_TOK_INVALID;
199 }
200 }
201 return XML_TOK_PARTIAL;
202}
203
204static int EXPATENTRY PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
205{
206 int upper = 0;
207 *tokPtr = XML_TOK_PI;
208 if (end - ptr != MINBPC(enc)*3)
209 return 1;
210 switch (BYTE_TO_ASCII(enc, ptr)) {
211 case ASCII_x:
212 break;
213 case ASCII_X:
214 upper = 1;
215 break;
216 default:
217 return 1;
218 }
219 ptr += MINBPC(enc);
220 switch (BYTE_TO_ASCII(enc, ptr)) {
221 case ASCII_m:
222 break;
223 case ASCII_M:
224 upper = 1;
225 break;
226 default:
227 return 1;
228 }
229 ptr += MINBPC(enc);
230 switch (BYTE_TO_ASCII(enc, ptr)) {
231 case ASCII_l:
232 break;
233 case ASCII_L:
234 upper = 1;
235 break;
236 default:
237 return 1;
238 }
239 if (upper)
240 return 0;
241 *tokPtr = XML_TOK_XML_DECL;
242 return 1;
243}
244
245/* ptr points to character following "<?" */
246
247static int EXPATENTRY PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
248 const char **nextTokPtr)
249{
250 int tok;
251 const char *target = ptr;
252 if (ptr == end)
253 return XML_TOK_PARTIAL;
254 switch (BYTE_TYPE(enc, ptr)) {
255 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
256 default:
257 *nextTokPtr = ptr;
258 return XML_TOK_INVALID;
259 }
260 while (ptr != end) {
261 switch (BYTE_TYPE(enc, ptr)) {
262 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
263 case BT_S: case BT_CR: case BT_LF:
264 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
265 *nextTokPtr = ptr;
266 return XML_TOK_INVALID;
267 }
268 ptr += MINBPC(enc);
269 while (ptr != end) {
270 switch (BYTE_TYPE(enc, ptr)) {
271 INVALID_CASES(ptr, nextTokPtr)
272 case BT_QUEST:
273 ptr += MINBPC(enc);
274 if (ptr == end)
275 return XML_TOK_PARTIAL;
276 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
277 *nextTokPtr = ptr + MINBPC(enc);
278 return tok;
279 }
280 break;
281 default:
282 ptr += MINBPC(enc);
283 break;
284 }
285 }
286 return XML_TOK_PARTIAL;
287 case BT_QUEST:
288 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
289 *nextTokPtr = ptr;
290 return XML_TOK_INVALID;
291 }
292 ptr += MINBPC(enc);
293 if (ptr == end)
294 return XML_TOK_PARTIAL;
295 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
296 *nextTokPtr = ptr + MINBPC(enc);
297 return tok;
298 }
299 /* fall through */
300 default:
301 *nextTokPtr = ptr;
302 return XML_TOK_INVALID;
303 }
304 }
305 return XML_TOK_PARTIAL;
306}
307
308
309static int EXPATENTRY PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
310 const char **nextTokPtr)
311{
312 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
313 int i;
314 /* CDATA[ */
315 if (end - ptr < 6 * MINBPC(enc))
316 return XML_TOK_PARTIAL;
317 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
318 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
319 *nextTokPtr = ptr;
320 return XML_TOK_INVALID;
321 }
322 }
323 *nextTokPtr = ptr;
324 return XML_TOK_CDATA_SECT_OPEN;
325}
326
327static int EXPATENTRY PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
328 const char **nextTokPtr)
329{
330 if (ptr == end)
331 return XML_TOK_NONE;
332 if (MINBPC(enc) > 1) {
333 size_t n = end - ptr;
334 if (n & (MINBPC(enc) - 1)) {
335 n &= ~(MINBPC(enc) - 1);
336 if (n == 0)
337 return XML_TOK_PARTIAL;
338 end = ptr + n;
339 }
340 }
341 switch (BYTE_TYPE(enc, ptr)) {
342 case BT_RSQB:
343 ptr += MINBPC(enc);
344 if (ptr == end)
345 return XML_TOK_PARTIAL;
346 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
347 break;
348 ptr += MINBPC(enc);
349 if (ptr == end)
350 return XML_TOK_PARTIAL;
351 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
352 ptr -= MINBPC(enc);
353 break;
354 }
355 *nextTokPtr = ptr + MINBPC(enc);
356 return XML_TOK_CDATA_SECT_CLOSE;
357 case BT_CR:
358 ptr += MINBPC(enc);
359 if (ptr == end)
360 return XML_TOK_PARTIAL;
361 if (BYTE_TYPE(enc, ptr) == BT_LF)
362 ptr += MINBPC(enc);
363 *nextTokPtr = ptr;
364 return XML_TOK_DATA_NEWLINE;
365 case BT_LF:
366 *nextTokPtr = ptr + MINBPC(enc);
367 return XML_TOK_DATA_NEWLINE;
368 INVALID_CASES(ptr, nextTokPtr)
369 default:
370 ptr += MINBPC(enc);
371 break;
372 }
373 while (ptr != end) {
374 switch (BYTE_TYPE(enc, ptr)) {
375#define LEAD_CASE(n) \
376 case BT_LEAD ## n: \
377 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
378 *nextTokPtr = ptr; \
379 return XML_TOK_DATA_CHARS; \
380 } \
381 ptr += n; \
382 break;
383 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
384#undef LEAD_CASE
385 case BT_NONXML:
386 case BT_MALFORM:
387 case BT_TRAIL:
388 case BT_CR:
389 case BT_LF:
390 case BT_RSQB:
391 *nextTokPtr = ptr;
392 return XML_TOK_DATA_CHARS;
393 default:
394 ptr += MINBPC(enc);
395 break;
396 }
397 }
398 *nextTokPtr = ptr;
399 return XML_TOK_DATA_CHARS;
400}
401
402/* ptr points to character following "</" */
403
404static int EXPATENTRY PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
405 const char **nextTokPtr)
406{
407 if (ptr == end)
408 return XML_TOK_PARTIAL;
409 switch (BYTE_TYPE(enc, ptr)) {
410 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
411 default:
412 *nextTokPtr = ptr;
413 return XML_TOK_INVALID;
414 }
415 while (ptr != end) {
416 switch (BYTE_TYPE(enc, ptr)) {
417 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
418 case BT_S: case BT_CR: case BT_LF:
419 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
420 switch (BYTE_TYPE(enc, ptr)) {
421 case BT_S: case BT_CR: case BT_LF:
422 break;
423 case BT_GT:
424 *nextTokPtr = ptr + MINBPC(enc);
425 return XML_TOK_END_TAG;
426 default:
427 *nextTokPtr = ptr;
428 return XML_TOK_INVALID;
429 }
430 }
431 return XML_TOK_PARTIAL;
432#ifdef XML_NS
433 case BT_COLON:
434 /* no need to check qname syntax here, since end-tag must match exactly */
435 ptr += MINBPC(enc);
436 break;
437#endif
438 case BT_GT:
439 *nextTokPtr = ptr + MINBPC(enc);
440 return XML_TOK_END_TAG;
441 default:
442 *nextTokPtr = ptr;
443 return XML_TOK_INVALID;
444 }
445 }
446 return XML_TOK_PARTIAL;
447}
448
449/* ptr points to character following "&#X" */
450
451static int EXPATENTRY PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
452 const char **nextTokPtr)
453{
454 if (ptr != end) {
455 switch (BYTE_TYPE(enc, ptr)) {
456 case BT_DIGIT:
457 case BT_HEX:
458 break;
459 default:
460 *nextTokPtr = ptr;
461 return XML_TOK_INVALID;
462 }
463 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
464 switch (BYTE_TYPE(enc, ptr)) {
465 case BT_DIGIT:
466 case BT_HEX:
467 break;
468 case BT_SEMI:
469 *nextTokPtr = ptr + MINBPC(enc);
470 return XML_TOK_CHAR_REF;
471 default:
472 *nextTokPtr = ptr;
473 return XML_TOK_INVALID;
474 }
475 }
476 }
477 return XML_TOK_PARTIAL;
478}
479
480/* ptr points to character following "&#" */
481
482static int EXPATENTRY PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
483 const char **nextTokPtr)
484{
485 if (ptr != end) {
486 if (CHAR_MATCHES(enc, ptr, ASCII_x))
487 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
488 switch (BYTE_TYPE(enc, ptr)) {
489 case BT_DIGIT:
490 break;
491 default:
492 *nextTokPtr = ptr;
493 return XML_TOK_INVALID;
494 }
495 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
496 switch (BYTE_TYPE(enc, ptr)) {
497 case BT_DIGIT:
498 break;
499 case BT_SEMI:
500 *nextTokPtr = ptr + MINBPC(enc);
501 return XML_TOK_CHAR_REF;
502 default:
503 *nextTokPtr = ptr;
504 return XML_TOK_INVALID;
505 }
506 }
507 }
508 return XML_TOK_PARTIAL;
509}
510
511/* ptr points to character following "&" */
512
513static int EXPATENTRY PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
514 const char **nextTokPtr)
515{
516 if (ptr == end)
517 return XML_TOK_PARTIAL;
518 switch (BYTE_TYPE(enc, ptr)) {
519 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
520 case BT_NUM:
521 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
522 default:
523 *nextTokPtr = ptr;
524 return XML_TOK_INVALID;
525 }
526 while (ptr != end) {
527 switch (BYTE_TYPE(enc, ptr)) {
528 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
529 case BT_SEMI:
530 *nextTokPtr = ptr + MINBPC(enc);
531 return XML_TOK_ENTITY_REF;
532 default:
533 *nextTokPtr = ptr;
534 return XML_TOK_INVALID;
535 }
536 }
537 return XML_TOK_PARTIAL;
538}
539
540/* ptr points to character following first character of attribute name */
541
542static int EXPATENTRY PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
543 const char **nextTokPtr)
544{
545#ifdef XML_NS
546 int hadColon = 0;
547#endif
548 while (ptr != end) {
549 switch (BYTE_TYPE(enc, ptr)) {
550 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
551#ifdef XML_NS
552 case BT_COLON:
553 if (hadColon) {
554 *nextTokPtr = ptr;
555 return XML_TOK_INVALID;
556 }
557 hadColon = 1;
558 ptr += MINBPC(enc);
559 if (ptr == end)
560 return XML_TOK_PARTIAL;
561 switch (BYTE_TYPE(enc, ptr)) {
562 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
563 default:
564 *nextTokPtr = ptr;
565 return XML_TOK_INVALID;
566 }
567 break;
568#endif
569 case BT_S: case BT_CR: case BT_LF:
570 for (;;) {
571 int t;
572
573 ptr += MINBPC(enc);
574 if (ptr == end)
575 return XML_TOK_PARTIAL;
576 t = BYTE_TYPE(enc, ptr);
577 if (t == BT_EQUALS)
578 break;
579 switch (t) {
580 case BT_S:
581 case BT_LF:
582 case BT_CR:
583 break;
584 default:
585 *nextTokPtr = ptr;
586 return XML_TOK_INVALID;
587 }
588 }
589 /* fall through */
590 case BT_EQUALS:
591 {
592 int open;
593#ifdef XML_NS
594 hadColon = 0;
595#endif
596 for (;;) {
597
598 ptr += MINBPC(enc);
599 if (ptr == end)
600 return XML_TOK_PARTIAL;
601 open = BYTE_TYPE(enc, ptr);
602 if (open == BT_QUOT || open == BT_APOS)
603 break;
604 switch (open) {
605 case BT_S:
606 case BT_LF:
607 case BT_CR:
608 break;
609 default:
610 *nextTokPtr = ptr;
611 return XML_TOK_INVALID;
612 }
613 }
614 ptr += MINBPC(enc);
615 /* in attribute value */
616 for (;;) {
617 int t;
618 if (ptr == end)
619 return XML_TOK_PARTIAL;
620 t = BYTE_TYPE(enc, ptr);
621 if (t == open)
622 break;
623 switch (t) {
624 INVALID_CASES(ptr, nextTokPtr)
625 case BT_AMP:
626 {
627 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
628 if (tok <= 0) {
629 if (tok == XML_TOK_INVALID)
630 *nextTokPtr = ptr;
631 return tok;
632 }
633 break;
634 }
635 case BT_LT:
636 *nextTokPtr = ptr;
637 return XML_TOK_INVALID;
638 default:
639 ptr += MINBPC(enc);
640 break;
641 }
642 }
643 ptr += MINBPC(enc);
644 if (ptr == end)
645 return XML_TOK_PARTIAL;
646 switch (BYTE_TYPE(enc, ptr)) {
647 case BT_S:
648 case BT_CR:
649 case BT_LF:
650 break;
651 case BT_SOL:
652 goto sol;
653 case BT_GT:
654 goto gt;
655 default:
656 *nextTokPtr = ptr;
657 return XML_TOK_INVALID;
658 }
659 /* ptr points to closing quote */
660 for (;;) {
661 ptr += MINBPC(enc);
662 if (ptr == end)
663 return XML_TOK_PARTIAL;
664 switch (BYTE_TYPE(enc, ptr)) {
665 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
666 case BT_S: case BT_CR: case BT_LF:
667 continue;
668 case BT_GT:
669 gt:
670 *nextTokPtr = ptr + MINBPC(enc);
671 return XML_TOK_START_TAG_WITH_ATTS;
672 case BT_SOL:
673 sol:
674 ptr += MINBPC(enc);
675 if (ptr == end)
676 return XML_TOK_PARTIAL;
677 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
678 *nextTokPtr = ptr;
679 return XML_TOK_INVALID;
680 }
681 *nextTokPtr = ptr + MINBPC(enc);
682 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
683 default:
684 *nextTokPtr = ptr;
685 return XML_TOK_INVALID;
686 }
687 break;
688 }
689 break;
690 }
691 default:
692 *nextTokPtr = ptr;
693 return XML_TOK_INVALID;
694 }
695 }
696 return XML_TOK_PARTIAL;
697}
698
699/* ptr points to character following "<" */
700
701static int EXPATENTRY PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
702 const char **nextTokPtr)
703{
704#ifdef XML_NS
705 int hadColon;
706#endif
707 if (ptr == end)
708 return XML_TOK_PARTIAL;
709 switch (BYTE_TYPE(enc, ptr)) {
710 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
711 case BT_EXCL:
712 if ((ptr += MINBPC(enc)) == end)
713 return XML_TOK_PARTIAL;
714 switch (BYTE_TYPE(enc, ptr)) {
715 case BT_MINUS:
716 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
717 case BT_LSQB:
718 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
719 }
720 *nextTokPtr = ptr;
721 return XML_TOK_INVALID;
722 case BT_QUEST:
723 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
724 case BT_SOL:
725 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
726 default:
727 *nextTokPtr = ptr;
728 return XML_TOK_INVALID;
729 }
730#ifdef XML_NS
731 hadColon = 0;
732#endif
733 /* we have a start-tag */
734 while (ptr != end) {
735 switch (BYTE_TYPE(enc, ptr)) {
736 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
737#ifdef XML_NS
738 case BT_COLON:
739 if (hadColon) {
740 *nextTokPtr = ptr;
741 return XML_TOK_INVALID;
742 }
743 hadColon = 1;
744 ptr += MINBPC(enc);
745 if (ptr == end)
746 return XML_TOK_PARTIAL;
747 switch (BYTE_TYPE(enc, ptr)) {
748 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
749 default:
750 *nextTokPtr = ptr;
751 return XML_TOK_INVALID;
752 }
753 break;
754#endif
755 case BT_S: case BT_CR: case BT_LF:
756 {
757 ptr += MINBPC(enc);
758 while (ptr != end) {
759 switch (BYTE_TYPE(enc, ptr)) {
760 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
761 case BT_GT:
762 goto gt;
763 case BT_SOL:
764 goto sol;
765 case BT_S: case BT_CR: case BT_LF:
766 ptr += MINBPC(enc);
767 continue;
768 default:
769 *nextTokPtr = ptr;
770 return XML_TOK_INVALID;
771 }
772 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
773 }
774 return XML_TOK_PARTIAL;
775 }
776 case BT_GT:
777 gt:
778 *nextTokPtr = ptr + MINBPC(enc);
779 return XML_TOK_START_TAG_NO_ATTS;
780 case BT_SOL:
781 sol:
782 ptr += MINBPC(enc);
783 if (ptr == end)
784 return XML_TOK_PARTIAL;
785 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
786 *nextTokPtr = ptr;
787 return XML_TOK_INVALID;
788 }
789 *nextTokPtr = ptr + MINBPC(enc);
790 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
791 default:
792 *nextTokPtr = ptr;
793 return XML_TOK_INVALID;
794 }
795 }
796 return XML_TOK_PARTIAL;
797}
798
799static int EXPATENTRY PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
800 const char **nextTokPtr)
801{
802 if (ptr == end)
803 return XML_TOK_NONE;
804 if (MINBPC(enc) > 1) {
805 size_t n = end - ptr;
806 if (n & (MINBPC(enc) - 1)) {
807 n &= ~(MINBPC(enc) - 1);
808 if (n == 0)
809 return XML_TOK_PARTIAL;
810 end = ptr + n;
811 }
812 }
813 switch (BYTE_TYPE(enc, ptr)) {
814 case BT_LT:
815 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
816 case BT_AMP:
817 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
818 case BT_CR:
819 ptr += MINBPC(enc);
820 if (ptr == end)
821 return XML_TOK_TRAILING_CR;
822 if (BYTE_TYPE(enc, ptr) == BT_LF)
823 ptr += MINBPC(enc);
824 *nextTokPtr = ptr;
825 return XML_TOK_DATA_NEWLINE;
826 case BT_LF:
827 *nextTokPtr = ptr + MINBPC(enc);
828 return XML_TOK_DATA_NEWLINE;
829 case BT_RSQB:
830 ptr += MINBPC(enc);
831 if (ptr == end)
832 return XML_TOK_TRAILING_RSQB;
833 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
834 break;
835 ptr += MINBPC(enc);
836 if (ptr == end)
837 return XML_TOK_TRAILING_RSQB;
838 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
839 ptr -= MINBPC(enc);
840 break;
841 }
842 *nextTokPtr = ptr;
843 return XML_TOK_INVALID;
844 INVALID_CASES(ptr, nextTokPtr)
845 default:
846 ptr += MINBPC(enc);
847 break;
848 }
849 while (ptr != end) {
850 switch (BYTE_TYPE(enc, ptr)) {
851#define LEAD_CASE(n) \
852 case BT_LEAD ## n: \
853 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
854 *nextTokPtr = ptr; \
855 return XML_TOK_DATA_CHARS; \
856 } \
857 ptr += n; \
858 break;
859 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
860#undef LEAD_CASE
861 case BT_RSQB:
862 if (ptr + MINBPC(enc) != end) {
863 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
864 ptr += MINBPC(enc);
865 break;
866 }
867 if (ptr + 2*MINBPC(enc) != end) {
868 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
869 ptr += MINBPC(enc);
870 break;
871 }
872 *nextTokPtr = ptr + 2*MINBPC(enc);
873 return XML_TOK_INVALID;
874 }
875 }
876 /* fall through */
877 case BT_AMP:
878 case BT_LT:
879 case BT_NONXML:
880 case BT_MALFORM:
881 case BT_TRAIL:
882 case BT_CR:
883 case BT_LF:
884 *nextTokPtr = ptr;
885 return XML_TOK_DATA_CHARS;
886 default:
887 ptr += MINBPC(enc);
888 break;
889 }
890 }
891 *nextTokPtr = ptr;
892 return XML_TOK_DATA_CHARS;
893}
894
895/* ptr points to character following "%" */
896
897static int EXPATENTRY PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
898 const char **nextTokPtr)
899{
900 if (ptr == end)
901 return XML_TOK_PARTIAL;
902 switch (BYTE_TYPE(enc, ptr)) {
903 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
904 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
905 *nextTokPtr = ptr;
906 return XML_TOK_PERCENT;
907 default:
908 *nextTokPtr = ptr;
909 return XML_TOK_INVALID;
910 }
911 while (ptr != end) {
912 switch (BYTE_TYPE(enc, ptr)) {
913 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
914 case BT_SEMI:
915 *nextTokPtr = ptr + MINBPC(enc);
916 return XML_TOK_PARAM_ENTITY_REF;
917 default:
918 *nextTokPtr = ptr;
919 return XML_TOK_INVALID;
920 }
921 }
922 return XML_TOK_PARTIAL;
923}
924
925static int EXPATENTRY PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
926 const char **nextTokPtr)
927{
928 if (ptr == end)
929 return XML_TOK_PARTIAL;
930 switch (BYTE_TYPE(enc, ptr)) {
931 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
932 default:
933 *nextTokPtr = ptr;
934 return XML_TOK_INVALID;
935 }
936 while (ptr != end) {
937 switch (BYTE_TYPE(enc, ptr)) {
938 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
939 case BT_CR: case BT_LF: case BT_S:
940 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
941 *nextTokPtr = ptr;
942 return XML_TOK_POUND_NAME;
943 default:
944 *nextTokPtr = ptr;
945 return XML_TOK_INVALID;
946 }
947 }
948 return -XML_TOK_POUND_NAME;
949}
950
951static int EXPATENTRY PREFIX(scanLit)(int open, const ENCODING *enc,
952 const char *ptr, const char *end,
953 const char **nextTokPtr)
954{
955 while (ptr != end) {
956 int t = BYTE_TYPE(enc, ptr);
957 switch (t) {
958 INVALID_CASES(ptr, nextTokPtr)
959 case BT_QUOT:
960 case BT_APOS:
961 ptr += MINBPC(enc);
962 if (t != open)
963 break;
964 if (ptr == end)
965 return -XML_TOK_LITERAL;
966 *nextTokPtr = ptr;
967 switch (BYTE_TYPE(enc, ptr)) {
968 case BT_S: case BT_CR: case BT_LF:
969 case BT_GT: case BT_PERCNT: case BT_LSQB:
970 return XML_TOK_LITERAL;
971 default:
972 return XML_TOK_INVALID;
973 }
974 default:
975 ptr += MINBPC(enc);
976 break;
977 }
978 }
979 return XML_TOK_PARTIAL;
980}
981
982static int EXPATENTRY PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
983 const char **nextTokPtr)
984{
985 int tok;
986 if (ptr == end)
987 return XML_TOK_NONE;
988 if (MINBPC(enc) > 1) {
989 size_t n = end - ptr;
990 if (n & (MINBPC(enc) - 1)) {
991 n &= ~(MINBPC(enc) - 1);
992 if (n == 0)
993 return XML_TOK_PARTIAL;
994 end = ptr + n;
995 }
996 }
997 switch (BYTE_TYPE(enc, ptr)) {
998 case BT_QUOT:
999 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1000 case BT_APOS:
1001 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1002 case BT_LT:
1003 {
1004 ptr += MINBPC(enc);
1005 if (ptr == end)
1006 return XML_TOK_PARTIAL;
1007 switch (BYTE_TYPE(enc, ptr)) {
1008 case BT_EXCL:
1009 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1010 case BT_QUEST:
1011 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1012 case BT_NMSTRT:
1013 case BT_HEX:
1014 case BT_NONASCII:
1015 case BT_LEAD2:
1016 case BT_LEAD3:
1017 case BT_LEAD4:
1018 *nextTokPtr = ptr - MINBPC(enc);
1019 return XML_TOK_INSTANCE_START;
1020 }
1021 *nextTokPtr = ptr;
1022 return XML_TOK_INVALID;
1023 }
1024 case BT_CR:
1025 if (ptr + MINBPC(enc) == end)
1026 return -XML_TOK_PROLOG_S;
1027 /* fall through */
1028 case BT_S: case BT_LF:
1029 for (;;) {
1030 ptr += MINBPC(enc);
1031 if (ptr == end)
1032 break;
1033 switch (BYTE_TYPE(enc, ptr)) {
1034 case BT_S: case BT_LF:
1035 break;
1036 case BT_CR:
1037 /* don't split CR/LF pair */
1038 if (ptr + MINBPC(enc) != end)
1039 break;
1040 /* fall through */
1041 default:
1042 *nextTokPtr = ptr;
1043 return XML_TOK_PROLOG_S;
1044 }
1045 }
1046 *nextTokPtr = ptr;
1047 return XML_TOK_PROLOG_S;
1048 case BT_PERCNT:
1049 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1050 case BT_COMMA:
1051 *nextTokPtr = ptr + MINBPC(enc);
1052 return XML_TOK_COMMA;
1053 case BT_LSQB:
1054 *nextTokPtr = ptr + MINBPC(enc);
1055 return XML_TOK_OPEN_BRACKET;
1056 case BT_RSQB:
1057 ptr += MINBPC(enc);
1058 if (ptr == end)
1059 return -XML_TOK_CLOSE_BRACKET;
1060 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1061 if (ptr + MINBPC(enc) == end)
1062 return XML_TOK_PARTIAL;
1063 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1064 *nextTokPtr = ptr + 2*MINBPC(enc);
1065 return XML_TOK_COND_SECT_CLOSE;
1066 }
1067 }
1068 *nextTokPtr = ptr;
1069 return XML_TOK_CLOSE_BRACKET;
1070 case BT_LPAR:
1071 *nextTokPtr = ptr + MINBPC(enc);
1072 return XML_TOK_OPEN_PAREN;
1073 case BT_RPAR:
1074 ptr += MINBPC(enc);
1075 if (ptr == end)
1076 return -XML_TOK_CLOSE_PAREN;
1077 switch (BYTE_TYPE(enc, ptr)) {
1078 case BT_AST:
1079 *nextTokPtr = ptr + MINBPC(enc);
1080 return XML_TOK_CLOSE_PAREN_ASTERISK;
1081 case BT_QUEST:
1082 *nextTokPtr = ptr + MINBPC(enc);
1083 return XML_TOK_CLOSE_PAREN_QUESTION;
1084 case BT_PLUS:
1085 *nextTokPtr = ptr + MINBPC(enc);
1086 return XML_TOK_CLOSE_PAREN_PLUS;
1087 case BT_CR: case BT_LF: case BT_S:
1088 case BT_GT: case BT_COMMA: case BT_VERBAR:
1089 case BT_RPAR:
1090 *nextTokPtr = ptr;
1091 return XML_TOK_CLOSE_PAREN;
1092 }
1093 *nextTokPtr = ptr;
1094 return XML_TOK_INVALID;
1095 case BT_VERBAR:
1096 *nextTokPtr = ptr + MINBPC(enc);
1097 return XML_TOK_OR;
1098 case BT_GT:
1099 *nextTokPtr = ptr + MINBPC(enc);
1100 return XML_TOK_DECL_CLOSE;
1101 case BT_NUM:
1102 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1103#define LEAD_CASE(n) \
1104 case BT_LEAD ## n: \
1105 if (end - ptr < n) \
1106 return XML_TOK_PARTIAL_CHAR; \
1107 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1108 ptr += n; \
1109 tok = XML_TOK_NAME; \
1110 break; \
1111 } \
1112 if (IS_NAME_CHAR(enc, ptr, n)) { \
1113 ptr += n; \
1114 tok = XML_TOK_NMTOKEN; \
1115 break; \
1116 } \
1117 *nextTokPtr = ptr; \
1118 return XML_TOK_INVALID;
1119 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1120#undef LEAD_CASE
1121 case BT_NMSTRT:
1122 case BT_HEX:
1123 tok = XML_TOK_NAME;
1124 ptr += MINBPC(enc);
1125 break;
1126 case BT_DIGIT:
1127 case BT_NAME:
1128 case BT_MINUS:
1129#ifdef XML_NS
1130 case BT_COLON:
1131#endif
1132 tok = XML_TOK_NMTOKEN;
1133 ptr += MINBPC(enc);
1134 break;
1135 case BT_NONASCII:
1136 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1137 ptr += MINBPC(enc);
1138 tok = XML_TOK_NAME;
1139 break;
1140 }
1141 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1142 ptr += MINBPC(enc);
1143 tok = XML_TOK_NMTOKEN;
1144 break;
1145 }
1146 /* fall through */
1147 default:
1148 *nextTokPtr = ptr;
1149 return XML_TOK_INVALID;
1150 }
1151 while (ptr != end) {
1152 switch (BYTE_TYPE(enc, ptr)) {
1153 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1154 case BT_GT: case BT_RPAR: case BT_COMMA:
1155 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1156 case BT_S: case BT_CR: case BT_LF:
1157 *nextTokPtr = ptr;
1158 return tok;
1159#ifdef XML_NS
1160 case BT_COLON:
1161 ptr += MINBPC(enc);
1162 switch (tok) {
1163 case XML_TOK_NAME:
1164 if (ptr == end)
1165 return XML_TOK_PARTIAL;
1166 tok = XML_TOK_PREFIXED_NAME;
1167 switch (BYTE_TYPE(enc, ptr)) {
1168 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1169 default:
1170 tok = XML_TOK_NMTOKEN;
1171 break;
1172 }
1173 break;
1174 case XML_TOK_PREFIXED_NAME:
1175 tok = XML_TOK_NMTOKEN;
1176 break;
1177 }
1178 break;
1179#endif
1180 case BT_PLUS:
1181 if (tok == XML_TOK_NMTOKEN) {
1182 *nextTokPtr = ptr;
1183 return XML_TOK_INVALID;
1184 }
1185 *nextTokPtr = ptr + MINBPC(enc);
1186 return XML_TOK_NAME_PLUS;
1187 case BT_AST:
1188 if (tok == XML_TOK_NMTOKEN) {
1189 *nextTokPtr = ptr;
1190 return XML_TOK_INVALID;
1191 }
1192 *nextTokPtr = ptr + MINBPC(enc);
1193 return XML_TOK_NAME_ASTERISK;
1194 case BT_QUEST:
1195 if (tok == XML_TOK_NMTOKEN) {
1196 *nextTokPtr = ptr;
1197 return XML_TOK_INVALID;
1198 }
1199 *nextTokPtr = ptr + MINBPC(enc);
1200 return XML_TOK_NAME_QUESTION;
1201 default:
1202 *nextTokPtr = ptr;
1203 return XML_TOK_INVALID;
1204 }
1205 }
1206 return -tok;
1207}
1208
1209static int EXPATENTRY PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1210 const char **nextTokPtr)
1211{
1212 const char *start;
1213 if (ptr == end)
1214 return XML_TOK_NONE;
1215 start = ptr;
1216 while (ptr != end) {
1217 switch (BYTE_TYPE(enc, ptr)) {
1218#define LEAD_CASE(n) \
1219 case BT_LEAD ## n: ptr += n; break;
1220 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1221#undef LEAD_CASE
1222 case BT_AMP:
1223 if (ptr == start)
1224 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1225 *nextTokPtr = ptr;
1226 return XML_TOK_DATA_CHARS;
1227 case BT_LT:
1228 /* this is for inside entity references */
1229 *nextTokPtr = ptr;
1230 return XML_TOK_INVALID;
1231 case BT_LF:
1232 if (ptr == start) {
1233 *nextTokPtr = ptr + MINBPC(enc);
1234 return XML_TOK_DATA_NEWLINE;
1235 }
1236 *nextTokPtr = ptr;
1237 return XML_TOK_DATA_CHARS;
1238 case BT_CR:
1239 if (ptr == start) {
1240 ptr += MINBPC(enc);
1241 if (ptr == end)
1242 return XML_TOK_TRAILING_CR;
1243 if (BYTE_TYPE(enc, ptr) == BT_LF)
1244 ptr += MINBPC(enc);
1245 *nextTokPtr = ptr;
1246 return XML_TOK_DATA_NEWLINE;
1247 }
1248 *nextTokPtr = ptr;
1249 return XML_TOK_DATA_CHARS;
1250 case BT_S:
1251 if (ptr == start) {
1252 *nextTokPtr = ptr + MINBPC(enc);
1253 return XML_TOK_ATTRIBUTE_VALUE_S;
1254 }
1255 *nextTokPtr = ptr;
1256 return XML_TOK_DATA_CHARS;
1257 default:
1258 ptr += MINBPC(enc);
1259 break;
1260 }
1261 }
1262 *nextTokPtr = ptr;
1263 return XML_TOK_DATA_CHARS;
1264}
1265
1266static int EXPATENTRY PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1267 const char **nextTokPtr)
1268{
1269 const char *start;
1270 if (ptr == end)
1271 return XML_TOK_NONE;
1272 start = ptr;
1273 while (ptr != end) {
1274 switch (BYTE_TYPE(enc, ptr)) {
1275#define LEAD_CASE(n) \
1276 case BT_LEAD ## n: ptr += n; break;
1277 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1278#undef LEAD_CASE
1279 case BT_AMP:
1280 if (ptr == start)
1281 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1282 *nextTokPtr = ptr;
1283 return XML_TOK_DATA_CHARS;
1284 case BT_PERCNT:
1285 if (ptr == start) {
1286 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1287 end, nextTokPtr);
1288 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1289 }
1290 *nextTokPtr = ptr;
1291 return XML_TOK_DATA_CHARS;
1292 case BT_LF:
1293 if (ptr == start) {
1294 *nextTokPtr = ptr + MINBPC(enc);
1295 return XML_TOK_DATA_NEWLINE;
1296 }
1297 *nextTokPtr = ptr;
1298 return XML_TOK_DATA_CHARS;
1299 case BT_CR:
1300 if (ptr == start) {
1301 ptr += MINBPC(enc);
1302 if (ptr == end)
1303 return XML_TOK_TRAILING_CR;
1304 if (BYTE_TYPE(enc, ptr) == BT_LF)
1305 ptr += MINBPC(enc);
1306 *nextTokPtr = ptr;
1307 return XML_TOK_DATA_NEWLINE;
1308 }
1309 *nextTokPtr = ptr;
1310 return XML_TOK_DATA_CHARS;
1311 default:
1312 ptr += MINBPC(enc);
1313 break;
1314 }
1315 }
1316 *nextTokPtr = ptr;
1317 return XML_TOK_DATA_CHARS;
1318}
1319
1320#ifdef XML_DTD
1321
1322static int EXPATENTRY PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1323 const char **nextTokPtr)
1324{
1325 int level = 0;
1326 if (MINBPC(enc) > 1) {
1327 size_t n = end - ptr;
1328 if (n & (MINBPC(enc) - 1)) {
1329 n &= ~(MINBPC(enc) - 1);
1330 end = ptr + n;
1331 }
1332 }
1333 while (ptr != end) {
1334 switch (BYTE_TYPE(enc, ptr)) {
1335 INVALID_CASES(ptr, nextTokPtr)
1336 case BT_LT:
1337 if ((ptr += MINBPC(enc)) == end)
1338 return XML_TOK_PARTIAL;
1339 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1340 if ((ptr += MINBPC(enc)) == end)
1341 return XML_TOK_PARTIAL;
1342 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1343 ++level;
1344 ptr += MINBPC(enc);
1345 }
1346 }
1347 break;
1348 case BT_RSQB:
1349 if ((ptr += MINBPC(enc)) == end)
1350 return XML_TOK_PARTIAL;
1351 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1352 if ((ptr += MINBPC(enc)) == end)
1353 return XML_TOK_PARTIAL;
1354 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1355 ptr += MINBPC(enc);
1356 if (level == 0) {
1357 *nextTokPtr = ptr;
1358 return XML_TOK_IGNORE_SECT;
1359 }
1360 --level;
1361 }
1362 }
1363 break;
1364 default:
1365 ptr += MINBPC(enc);
1366 break;
1367 }
1368 }
1369 return XML_TOK_PARTIAL;
1370}
1371
1372#endif /* XML_DTD */
1373
1374static int EXPATENTRY PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1375 const char **badPtr)
1376{
1377 ptr += MINBPC(enc);
1378 end -= MINBPC(enc);
1379 for (; ptr != end; ptr += MINBPC(enc)) {
1380 switch (BYTE_TYPE(enc, ptr)) {
1381 case BT_DIGIT:
1382 case BT_HEX:
1383 case BT_MINUS:
1384 case BT_APOS:
1385 case BT_LPAR:
1386 case BT_RPAR:
1387 case BT_PLUS:
1388 case BT_COMMA:
1389 case BT_SOL:
1390 case BT_EQUALS:
1391 case BT_QUEST:
1392 case BT_CR:
1393 case BT_LF:
1394 case BT_SEMI:
1395 case BT_EXCL:
1396 case BT_AST:
1397 case BT_PERCNT:
1398 case BT_NUM:
1399#ifdef XML_NS
1400 case BT_COLON:
1401#endif
1402 break;
1403 case BT_S:
1404 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1405 *badPtr = ptr;
1406 return 0;
1407 }
1408 break;
1409 case BT_NAME:
1410 case BT_NMSTRT:
1411 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1412 break;
1413 default:
1414 switch (BYTE_TO_ASCII(enc, ptr)) {
1415 case 0x24: /* $ */
1416 case 0x40: /* @ */
1417 break;
1418 default:
1419 *badPtr = ptr;
1420 return 0;
1421 }
1422 break;
1423 }
1424 }
1425 return 1;
1426}
1427
1428/* This must only be called for a well-formed start-tag or empty element tag.
1429Returns the number of attributes. Pointers to the first attsMax attributes
1430are stored in atts. */
1431
1432static int EXPATENTRY PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
1433 int attsMax, ATTRIBUTE *atts)
1434{
1435 enum { other, inName, inValue } state = inName;
1436 int nAtts = 0;
1437 int open = 0; /* defined when state == inValue;
1438 initialization just to shut up compilers */
1439
1440 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1441 switch (BYTE_TYPE(enc, ptr)) {
1442#define START_NAME \
1443 if (state == other) { \
1444 if (nAtts < attsMax) { \
1445 atts[nAtts].name = ptr; \
1446 atts[nAtts].normalized = 1; \
1447 } \
1448 state = inName; \
1449 }
1450#define LEAD_CASE(n) \
1451 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1452 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1453#undef LEAD_CASE
1454 case BT_NONASCII:
1455 case BT_NMSTRT:
1456 case BT_HEX:
1457 START_NAME
1458 break;
1459#undef START_NAME
1460 case BT_QUOT:
1461 if (state != inValue) {
1462 if (nAtts < attsMax)
1463 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1464 state = inValue;
1465 open = BT_QUOT;
1466 }
1467 else if (open == BT_QUOT) {
1468 state = other;
1469 if (nAtts < attsMax)
1470 atts[nAtts].valueEnd = ptr;
1471 nAtts++;
1472 }
1473 break;
1474 case BT_APOS:
1475 if (state != inValue) {
1476 if (nAtts < attsMax)
1477 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1478 state = inValue;
1479 open = BT_APOS;
1480 }
1481 else if (open == BT_APOS) {
1482 state = other;
1483 if (nAtts < attsMax)
1484 atts[nAtts].valueEnd = ptr;
1485 nAtts++;
1486 }
1487 break;
1488 case BT_AMP:
1489 if (nAtts < attsMax)
1490 atts[nAtts].normalized = 0;
1491 break;
1492 case BT_S:
1493 if (state == inName)
1494 state = other;
1495 else if (state == inValue
1496 && nAtts < attsMax
1497 && atts[nAtts].normalized
1498 && (ptr == atts[nAtts].valuePtr
1499 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1500 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1501 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1502 atts[nAtts].normalized = 0;
1503 break;
1504 case BT_CR: case BT_LF:
1505 /* This case ensures that the first attribute name is counted
1506 Apart from that we could just change state on the quote. */
1507 if (state == inName)
1508 state = other;
1509 else if (state == inValue && nAtts < attsMax)
1510 atts[nAtts].normalized = 0;
1511 break;
1512 case BT_GT:
1513 case BT_SOL:
1514 if (state != inValue)
1515 return nAtts;
1516 break;
1517 default:
1518 break;
1519 }
1520 }
1521 /* not reached */
1522}
1523
1524static int EXPATENTRY PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
1525{
1526 int result = 0;
1527 /* skip &# */
1528 ptr += 2*MINBPC(enc);
1529 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1530 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1531 int c = BYTE_TO_ASCII(enc, ptr);
1532 switch (c) {
1533 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1534 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1535 result <<= 4;
1536 result |= (c - ASCII_0);
1537 break;
1538 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1539 result <<= 4;
1540 result += 10 + (c - ASCII_A);
1541 break;
1542 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1543 result <<= 4;
1544 result += 10 + (c - ASCII_a);
1545 break;
1546 }
1547 if (result >= 0x110000)
1548 return -1;
1549 }
1550 }
1551 else {
1552 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1553 int c = BYTE_TO_ASCII(enc, ptr);
1554 result *= 10;
1555 result += (c - ASCII_0);
1556 if (result >= 0x110000)
1557 return -1;
1558 }
1559 }
1560 return checkCharRefNumber(result);
1561}
1562
1563static int EXPATENTRY PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
1564{
1565 switch ((end - ptr)/MINBPC(enc)) {
1566 case 2:
1567 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1568 switch (BYTE_TO_ASCII(enc, ptr)) {
1569 case ASCII_l:
1570 return ASCII_LT;
1571 case ASCII_g:
1572 return ASCII_GT;
1573 }
1574 }
1575 break;
1576 case 3:
1577 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1578 ptr += MINBPC(enc);
1579 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1580 ptr += MINBPC(enc);
1581 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1582 return ASCII_AMP;
1583 }
1584 }
1585 break;
1586 case 4:
1587 switch (BYTE_TO_ASCII(enc, ptr)) {
1588 case ASCII_q:
1589 ptr += MINBPC(enc);
1590 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1591 ptr += MINBPC(enc);
1592 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1593 ptr += MINBPC(enc);
1594 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1595 return ASCII_QUOT;
1596 }
1597 }
1598 break;
1599 case ASCII_a:
1600 ptr += MINBPC(enc);
1601 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1602 ptr += MINBPC(enc);
1603 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1604 ptr += MINBPC(enc);
1605 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1606 return ASCII_APOS;
1607 }
1608 }
1609 break;
1610 }
1611 }
1612 return 0;
1613}
1614
1615static int EXPATENTRY PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
1616{
1617 for (;;) {
1618 switch (BYTE_TYPE(enc, ptr1)) {
1619#define LEAD_CASE(n) \
1620 case BT_LEAD ## n: \
1621 if (*ptr1++ != *ptr2++) \
1622 return 0;
1623 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1624#undef LEAD_CASE
1625 /* fall through */
1626 if (*ptr1++ != *ptr2++)
1627 return 0;
1628 break;
1629 case BT_NONASCII:
1630 case BT_NMSTRT:
1631#ifdef XML_NS
1632 case BT_COLON:
1633#endif
1634 case BT_HEX:
1635 case BT_DIGIT:
1636 case BT_NAME:
1637 case BT_MINUS:
1638 if (*ptr2++ != *ptr1++)
1639 return 0;
1640 if (MINBPC(enc) > 1) {
1641 if (*ptr2++ != *ptr1++)
1642 return 0;
1643 if (MINBPC(enc) > 2) {
1644 if (*ptr2++ != *ptr1++)
1645 return 0;
1646 if (MINBPC(enc) > 3) {
1647 if (*ptr2++ != *ptr1++)
1648 return 0;
1649 }
1650 }
1651 }
1652 break;
1653 default:
1654 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1655 return 1;
1656 switch (BYTE_TYPE(enc, ptr2)) {
1657 case BT_LEAD2:
1658 case BT_LEAD3:
1659 case BT_LEAD4:
1660 case BT_NONASCII:
1661 case BT_NMSTRT:
1662#ifdef XML_NS
1663 case BT_COLON:
1664#endif
1665 case BT_HEX:
1666 case BT_DIGIT:
1667 case BT_NAME:
1668 case BT_MINUS:
1669 return 0;
1670 default:
1671 return 1;
1672 }
1673 }
1674 }
1675 /* not reached */
1676}
1677
1678static int EXPATENTRY PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1679 const char *end1, const char *ptr2)
1680{
1681 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1682 if (ptr1 == end1)
1683 return 0;
1684 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1685 return 0;
1686 }
1687 return ptr1 == end1;
1688}
1689
1690static int EXPATENTRY PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
1691{
1692 const char *start = ptr;
1693 for (;;) {
1694 switch (BYTE_TYPE(enc, ptr)) {
1695#define LEAD_CASE(n) \
1696 case BT_LEAD ## n: ptr += n; break;
1697 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1698#undef LEAD_CASE
1699 case BT_NONASCII:
1700 case BT_NMSTRT:
1701#ifdef XML_NS
1702 case BT_COLON:
1703#endif
1704 case BT_HEX:
1705 case BT_DIGIT:
1706 case BT_NAME:
1707 case BT_MINUS:
1708 ptr += MINBPC(enc);
1709 break;
1710 default:
1711 return ptr - start;
1712 }
1713 }
1714}
1715
1716static const char* EXPATENTRY PREFIX(skipS)(const ENCODING *enc, const char *ptr)
1717{
1718 for (;;) {
1719 switch (BYTE_TYPE(enc, ptr)) {
1720 case BT_LF:
1721 case BT_CR:
1722 case BT_S:
1723 ptr += MINBPC(enc);
1724 break;
1725 default:
1726 return ptr;
1727 }
1728 }
1729}
1730
1731static void EXPATENTRY PREFIX(updatePosition)(const ENCODING *enc,
1732 const char *ptr,
1733 const char *end,
1734 POSITION *pos)
1735{
1736 while (ptr != end) {
1737 switch (BYTE_TYPE(enc, ptr)) {
1738#define LEAD_CASE(n) \
1739 case BT_LEAD ## n: \
1740 ptr += n; \
1741 break;
1742 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1743#undef LEAD_CASE
1744 case BT_LF:
1745 pos->columnNumber = (unsigned)-1;
1746 pos->lineNumber++;
1747 ptr += MINBPC(enc);
1748 break;
1749 case BT_CR:
1750 pos->lineNumber++;
1751 ptr += MINBPC(enc);
1752 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1753 ptr += MINBPC(enc);
1754 pos->columnNumber = (unsigned)-1;
1755 break;
1756 default:
1757 ptr += MINBPC(enc);
1758 break;
1759 }
1760 pos->columnNumber++;
1761 }
1762}
1763
1764#undef DO_LEAD_CASE
1765#undef MULTIBYTE_CASES
1766#undef INVALID_CASES
1767#undef CHECK_NAME_CASE
1768#undef CHECK_NAME_CASES
1769#undef CHECK_NMSTRT_CASE
1770#undef CHECK_NMSTRT_CASES
Note: See TracBrowser for help on using the repository browser.