source: trunk/src/helpers/xmltok_impl.c@ 43

Last change on this file since 43 was 38, checked in by umoeller, 25 years ago

Updates to XML.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 62.7 KB
Line 
1
2/*
3 *sourcefile xmltok_impl.c
4 * part of the expat implementation. See xmlparse.c.
5 *
6 * NOTE: This file must not be compiled directly. It is
7 * #include'd from xmltok.c several times.
8 */
9
10/*
11 * Copyright (C) 2001 Ulrich M”ller.
12 * Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd.
13 * and Clark Cooper.
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the
17 * "Software"), to deal in the Software without restriction, including
18 * without limitation the rights to use, copy, modify, merge, publish,
19 * distribute, sublicense, and/or sell copies of the Software, and to
20 * permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included
24 * in all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
29 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
30 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
31 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
32 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 */
34
35#ifndef IS_INVALID_CHAR
36#define IS_INVALID_CHAR(enc, ptr, n) (0)
37#endif
38
39#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40 case BT_LEAD ## n: \
41 if (end - ptr < n) \
42 return XML_TOK_PARTIAL_CHAR; \
43 if (IS_INVALID_CHAR(enc, ptr, n)) { \
44 *(nextTokPtr) = (ptr); \
45 return XML_TOK_INVALID; \
46 } \
47 ptr += n; \
48 break;
49
50#define INVALID_CASES(ptr, nextTokPtr) \
51 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54 case BT_NONXML: \
55 case BT_MALFORM: \
56 case BT_TRAIL: \
57 *(nextTokPtr) = (ptr); \
58 return XML_TOK_INVALID;
59
60#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61 case BT_LEAD ## n: \
62 if (end - ptr < n) \
63 return XML_TOK_PARTIAL_CHAR; \
64 if (!IS_NAME_CHAR(enc, ptr, n)) { \
65 *nextTokPtr = ptr; \
66 return XML_TOK_INVALID; \
67 } \
68 ptr += n; \
69 break;
70
71#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72 case BT_NONASCII: \
73 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74 *nextTokPtr = ptr; \
75 return XML_TOK_INVALID; \
76 } \
77 case BT_NMSTRT: \
78 case BT_HEX: \
79 case BT_DIGIT: \
80 case BT_NAME: \
81 case BT_MINUS: \
82 ptr += MINBPC(enc); \
83 break; \
84 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89 case BT_LEAD ## n: \
90 if (end - ptr < n) \
91 return XML_TOK_PARTIAL_CHAR; \
92 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93 *nextTokPtr = ptr; \
94 return XML_TOK_INVALID; \
95 } \
96 ptr += n; \
97 break;
98
99#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100 case BT_NONASCII: \
101 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102 *nextTokPtr = ptr; \
103 return XML_TOK_INVALID; \
104 } \
105 case BT_NMSTRT: \
106 case BT_HEX: \
107 ptr += MINBPC(enc); \
108 break; \
109 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113#ifndef PREFIX
114#define PREFIX(ident) ident
115#endif
116
117/* ptr points to character following "<!-" */
118
119static int EXPATENTRY PREFIX(scanComment)(const ENCODING * enc,
120 const char *ptr,
121 const char *end,
122 const char **nextTokPtr)
123{
124 if (ptr != end)
125 {
126 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS))
127 {
128 *nextTokPtr = ptr;
129 return XML_TOK_INVALID;
130 }
131 ptr += MINBPC(enc);
132 while (ptr != end)
133 {
134 switch (BYTE_TYPE(enc, ptr))
135 {
136 INVALID_CASES(ptr, nextTokPtr)
137 case BT_MINUS:
138 if ((ptr += MINBPC(enc)) == end)
139 return XML_TOK_PARTIAL;
140 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS))
141 {
142 if ((ptr += MINBPC(enc)) == end)
143 return XML_TOK_PARTIAL;
144 if (!CHAR_MATCHES(enc, ptr, ASCII_GT))
145 {
146 *nextTokPtr = ptr;
147 return XML_TOK_INVALID;
148 }
149 *nextTokPtr = ptr + MINBPC(enc);
150 return XML_TOK_COMMENT;
151 }
152 break;
153 default:
154 ptr += MINBPC(enc);
155 break;
156 }
157 }
158 }
159 return XML_TOK_PARTIAL;
160}
161
162/* ptr points to character following "<!" */
163
164static int EXPATENTRY PREFIX(scanDecl)(const ENCODING * enc,
165 const char *ptr,
166 const char *end,
167 const char **nextTokPtr)
168{
169 if (ptr == end)
170 return XML_TOK_PARTIAL;
171 switch (BYTE_TYPE(enc, ptr))
172 {
173 case BT_MINUS:
174 return PREFIX(scanComment) (enc, ptr + MINBPC(enc), end, nextTokPtr);
175 case BT_LSQB:
176 *nextTokPtr = ptr + MINBPC(enc);
177 return XML_TOK_COND_SECT_OPEN;
178 case BT_NMSTRT:
179 case BT_HEX:
180 ptr += MINBPC(enc);
181 break;
182 default:
183 *nextTokPtr = ptr;
184 return XML_TOK_INVALID;
185 }
186 while (ptr != end)
187 {
188 switch (BYTE_TYPE(enc, ptr))
189 {
190 case BT_PERCNT:
191 if (ptr + MINBPC(enc) == end)
192 return XML_TOK_PARTIAL;
193 /* don't allow <!ENTITY% foo "whatever"> */
194 switch (BYTE_TYPE(enc, ptr + MINBPC(enc)))
195 {
196 case BT_S:
197 case BT_CR:
198 case BT_LF:
199 case BT_PERCNT:
200 *nextTokPtr = ptr;
201 return XML_TOK_INVALID;
202 }
203 /* fall through */
204 case BT_S:
205 case BT_CR:
206 case BT_LF:
207 *nextTokPtr = ptr;
208 return XML_TOK_DECL_OPEN;
209 case BT_NMSTRT:
210 case BT_HEX:
211 ptr += MINBPC(enc);
212 break;
213 default:
214 *nextTokPtr = ptr;
215 return XML_TOK_INVALID;
216 }
217 }
218 return XML_TOK_PARTIAL;
219}
220
221static int EXPATENTRY PREFIX(checkPiTarget) (const ENCODING * enc, const char *ptr, const char *end, int *tokPtr)
222{
223 int upper = 0;
224
225 *tokPtr = XML_TOK_PI;
226 if (end - ptr != MINBPC(enc) * 3)
227 return 1;
228 switch (BYTE_TO_ASCII(enc, ptr))
229 {
230 case ASCII_x:
231 break;
232 case ASCII_X:
233 upper = 1;
234 break;
235 default:
236 return 1;
237 }
238 ptr += MINBPC(enc);
239 switch (BYTE_TO_ASCII(enc, ptr))
240 {
241 case ASCII_m:
242 break;
243 case ASCII_M:
244 upper = 1;
245 break;
246 default:
247 return 1;
248 }
249 ptr += MINBPC(enc);
250 switch (BYTE_TO_ASCII(enc, ptr))
251 {
252 case ASCII_l:
253 break;
254 case ASCII_L:
255 upper = 1;
256 break;
257 default:
258 return 1;
259 }
260 if (upper)
261 return 0;
262 *tokPtr = XML_TOK_XML_DECL;
263 return 1;
264}
265
266/* ptr points to character following "<?" */
267
268static
269int EXPATENTRY PREFIX(scanPi) (const ENCODING * enc, const char *ptr, const char *end,
270 const char **nextTokPtr)
271{
272 int tok;
273 const char *target = ptr;
274
275 if (ptr == end)
276 return XML_TOK_PARTIAL;
277 switch (BYTE_TYPE(enc, ptr))
278 {
279 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
280 default:
281 *nextTokPtr = ptr;
282 return XML_TOK_INVALID;
283 }
284 while (ptr != end)
285 {
286 switch (BYTE_TYPE(enc, ptr))
287 {
288 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
289 case BT_S:
290 case BT_CR:
291 case BT_LF:
292 if (!PREFIX(checkPiTarget) (enc, target, ptr, &tok))
293 {
294 *nextTokPtr = ptr;
295 return XML_TOK_INVALID;
296 }
297 ptr += MINBPC(enc);
298 while (ptr != end)
299 {
300 switch (BYTE_TYPE(enc, ptr))
301 {
302 INVALID_CASES(ptr, nextTokPtr)
303 case BT_QUEST:
304 ptr += MINBPC(enc);
305 if (ptr == end)
306 return XML_TOK_PARTIAL;
307 if (CHAR_MATCHES(enc, ptr, ASCII_GT))
308 {
309 *nextTokPtr = ptr + MINBPC(enc);
310 return tok;
311 }
312 break;
313 default:
314 ptr += MINBPC(enc);
315 break;
316 }
317 }
318 return XML_TOK_PARTIAL;
319 case BT_QUEST:
320 if (!PREFIX(checkPiTarget) (enc, target, ptr, &tok))
321 {
322 *nextTokPtr = ptr;
323 return XML_TOK_INVALID;
324 }
325 ptr += MINBPC(enc);
326 if (ptr == end)
327 return XML_TOK_PARTIAL;
328 if (CHAR_MATCHES(enc, ptr, ASCII_GT))
329 {
330 *nextTokPtr = ptr + MINBPC(enc);
331 return tok;
332 }
333 /* fall through */
334 default:
335 *nextTokPtr = ptr;
336 return XML_TOK_INVALID;
337 }
338 }
339 return XML_TOK_PARTIAL;
340}
341
342
343static
344int EXPATENTRY PREFIX(scanCdataSection) (const ENCODING * enc, const char *ptr, const char *end,
345 const char **nextTokPtr)
346{
347 static const char CDATA_LSQB[] =
348 {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
349 int i;
350
351 /* CDATA[ */
352 if (end - ptr < 6 * MINBPC(enc))
353 return XML_TOK_PARTIAL;
354 for (i = 0; i < 6; i++, ptr += MINBPC(enc))
355 {
356 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i]))
357 {
358 *nextTokPtr = ptr;
359 return XML_TOK_INVALID;
360 }
361 }
362 *nextTokPtr = ptr;
363 return XML_TOK_CDATA_SECT_OPEN;
364}
365
366static
367int EXPATENTRY PREFIX(cdataSectionTok) (const ENCODING * enc, const char *ptr, const char *end,
368 const char **nextTokPtr)
369{
370 if (ptr == end)
371 return XML_TOK_NONE;
372 if (MINBPC(enc) > 1)
373 {
374 size_t n = end - ptr;
375
376 if (n & (MINBPC(enc) - 1))
377 {
378 n &= ~(MINBPC(enc) - 1);
379 if (n == 0)
380 return XML_TOK_PARTIAL;
381 end = ptr + n;
382 }
383 }
384 switch (BYTE_TYPE(enc, ptr))
385 {
386 case BT_RSQB:
387 ptr += MINBPC(enc);
388 if (ptr == end)
389 return XML_TOK_PARTIAL;
390 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
391 break;
392 ptr += MINBPC(enc);
393 if (ptr == end)
394 return XML_TOK_PARTIAL;
395 if (!CHAR_MATCHES(enc, ptr, ASCII_GT))
396 {
397 ptr -= MINBPC(enc);
398 break;
399 }
400 *nextTokPtr = ptr + MINBPC(enc);
401 return XML_TOK_CDATA_SECT_CLOSE;
402 case BT_CR:
403 ptr += MINBPC(enc);
404 if (ptr == end)
405 return XML_TOK_PARTIAL;
406 if (BYTE_TYPE(enc, ptr) == BT_LF)
407 ptr += MINBPC(enc);
408 *nextTokPtr = ptr;
409 return XML_TOK_DATA_NEWLINE;
410 case BT_LF:
411 *nextTokPtr = ptr + MINBPC(enc);
412 return XML_TOK_DATA_NEWLINE;
413 INVALID_CASES(ptr, nextTokPtr)
414 default:
415 ptr += MINBPC(enc);
416 break;
417 }
418 while (ptr != end)
419 {
420 switch (BYTE_TYPE(enc, ptr))
421 {
422#define LEAD_CASE(n) \
423 case BT_LEAD ## n: \
424 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
425 *nextTokPtr = ptr; \
426 return XML_TOK_DATA_CHARS; \
427 } \
428 ptr += n; \
429 break;
430 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
431#undef LEAD_CASE
432 case BT_NONXML:
433 case BT_MALFORM:
434 case BT_TRAIL:
435 case BT_CR:
436 case BT_LF:
437 case BT_RSQB:
438 *nextTokPtr = ptr;
439 return XML_TOK_DATA_CHARS;
440 default:
441 ptr += MINBPC(enc);
442 break;
443 }
444 }
445 *nextTokPtr = ptr;
446 return XML_TOK_DATA_CHARS;
447}
448
449/* ptr points to character following "</" */
450
451static
452int EXPATENTRY PREFIX(scanEndTag) (const ENCODING * enc, const char *ptr, const char *end,
453 const char **nextTokPtr)
454{
455 if (ptr == end)
456 return XML_TOK_PARTIAL;
457 switch (BYTE_TYPE(enc, ptr))
458 {
459 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
460 default:
461 *nextTokPtr = ptr;
462 return XML_TOK_INVALID;
463 }
464 while (ptr != end)
465 {
466 switch (BYTE_TYPE(enc, ptr))
467 {
468 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
469 case BT_S:
470 case BT_CR:
471 case BT_LF:
472 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc))
473 {
474 switch (BYTE_TYPE(enc, ptr))
475 {
476 case BT_S:
477 case BT_CR:
478 case BT_LF:
479 break;
480 case BT_GT:
481 *nextTokPtr = ptr + MINBPC(enc);
482 return XML_TOK_END_TAG;
483 default:
484 *nextTokPtr = ptr;
485 return XML_TOK_INVALID;
486 }
487 }
488 return XML_TOK_PARTIAL;
489#ifdef XML_NS
490 case BT_COLON:
491 /* no need to check qname syntax here, since end-tag must match exactly */
492 ptr += MINBPC(enc);
493 break;
494#endif
495 case BT_GT:
496 *nextTokPtr = ptr + MINBPC(enc);
497 return XML_TOK_END_TAG;
498 default:
499 *nextTokPtr = ptr;
500 return XML_TOK_INVALID;
501 }
502 }
503 return XML_TOK_PARTIAL;
504}
505
506/* ptr points to character following "&#X" */
507
508static
509int EXPATENTRY PREFIX(scanHexCharRef) (const ENCODING * enc, const char *ptr, const char *end,
510 const char **nextTokPtr)
511{
512 if (ptr != end)
513 {
514 switch (BYTE_TYPE(enc, ptr))
515 {
516 case BT_DIGIT:
517 case BT_HEX:
518 break;
519 default:
520 *nextTokPtr = ptr;
521 return XML_TOK_INVALID;
522 }
523 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc))
524 {
525 switch (BYTE_TYPE(enc, ptr))
526 {
527 case BT_DIGIT:
528 case BT_HEX:
529 break;
530 case BT_SEMI:
531 *nextTokPtr = ptr + MINBPC(enc);
532 return XML_TOK_CHAR_REF;
533 default:
534 *nextTokPtr = ptr;
535 return XML_TOK_INVALID;
536 }
537 }
538 }
539 return XML_TOK_PARTIAL;
540}
541
542/* ptr points to character following "&#" */
543
544static
545int EXPATENTRY PREFIX(scanCharRef) (const ENCODING * enc, const char *ptr, const char *end,
546 const char **nextTokPtr)
547{
548 if (ptr != end)
549 {
550 if (CHAR_MATCHES(enc, ptr, ASCII_x))
551 return PREFIX(scanHexCharRef) (enc, ptr + MINBPC(enc), end, nextTokPtr);
552 switch (BYTE_TYPE(enc, ptr))
553 {
554 case BT_DIGIT:
555 break;
556 default:
557 *nextTokPtr = ptr;
558 return XML_TOK_INVALID;
559 }
560 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc))
561 {
562 switch (BYTE_TYPE(enc, ptr))
563 {
564 case BT_DIGIT:
565 break;
566 case BT_SEMI:
567 *nextTokPtr = ptr + MINBPC(enc);
568 return XML_TOK_CHAR_REF;
569 default:
570 *nextTokPtr = ptr;
571 return XML_TOK_INVALID;
572 }
573 }
574 }
575 return XML_TOK_PARTIAL;
576}
577
578/* ptr points to character following "&" */
579
580static
581int EXPATENTRY PREFIX(scanRef) (const ENCODING * enc, const char *ptr, const char *end,
582 const char **nextTokPtr)
583{
584 if (ptr == end)
585 return XML_TOK_PARTIAL;
586 switch (BYTE_TYPE(enc, ptr))
587 {
588 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
589 case BT_NUM:
590 return PREFIX(scanCharRef) (enc, ptr + MINBPC(enc), end, nextTokPtr);
591 default:
592 *nextTokPtr = ptr;
593 return XML_TOK_INVALID;
594 }
595 while (ptr != end)
596 {
597 switch (BYTE_TYPE(enc, ptr))
598 {
599 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
600 case BT_SEMI:
601 *nextTokPtr = ptr + MINBPC(enc);
602 return XML_TOK_ENTITY_REF;
603 default:
604 *nextTokPtr = ptr;
605 return XML_TOK_INVALID;
606 }
607 }
608 return XML_TOK_PARTIAL;
609}
610
611/* ptr points to character following first character of attribute name */
612
613static
614int EXPATENTRY PREFIX(scanAtts) (const ENCODING * enc, const char *ptr, const char *end,
615 const char **nextTokPtr)
616{
617#ifdef XML_NS
618 int hadColon = 0;
619
620#endif
621 while (ptr != end)
622 {
623 switch (BYTE_TYPE(enc, ptr))
624 {
625 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
626#ifdef XML_NS
627 case BT_COLON:
628 if (hadColon)
629 {
630 *nextTokPtr = ptr;
631 return XML_TOK_INVALID;
632 }
633 hadColon = 1;
634 ptr += MINBPC(enc);
635 if (ptr == end)
636 return XML_TOK_PARTIAL;
637 switch (BYTE_TYPE(enc, ptr))
638 {
639 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
640 default:
641 *nextTokPtr = ptr;
642 return XML_TOK_INVALID;
643 }
644 break;
645#endif
646 case BT_S:
647 case BT_CR:
648 case BT_LF:
649 for (;;)
650 {
651 int t;
652
653 ptr += MINBPC(enc);
654 if (ptr == end)
655 return XML_TOK_PARTIAL;
656 t = BYTE_TYPE(enc, ptr);
657 if (t == BT_EQUALS)
658 break;
659 switch (t)
660 {
661 case BT_S:
662 case BT_LF:
663 case BT_CR:
664 break;
665 default:
666 *nextTokPtr = ptr;
667 return XML_TOK_INVALID;
668 }
669 }
670 /* fall through */
671 case BT_EQUALS:
672 {
673 int open;
674
675#ifdef XML_NS
676 hadColon = 0;
677#endif
678 for (;;)
679 {
680
681 ptr += MINBPC(enc);
682 if (ptr == end)
683 return XML_TOK_PARTIAL;
684 open = BYTE_TYPE(enc, ptr);
685 if (open == BT_QUOT || open == BT_APOS)
686 break;
687 switch (open)
688 {
689 case BT_S:
690 case BT_LF:
691 case BT_CR:
692 break;
693 default:
694 *nextTokPtr = ptr;
695 return XML_TOK_INVALID;
696 }
697 }
698 ptr += MINBPC(enc);
699 /* in attribute value */
700 for (;;)
701 {
702 int t;
703
704 if (ptr == end)
705 return XML_TOK_PARTIAL;
706 t = BYTE_TYPE(enc, ptr);
707 if (t == open)
708 break;
709 switch (t)
710 {
711 INVALID_CASES(ptr, nextTokPtr)
712 case BT_AMP:
713 {
714 int tok = PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, &ptr);
715
716 if (tok <= 0)
717 {
718 if (tok == XML_TOK_INVALID)
719 *nextTokPtr = ptr;
720 return tok;
721 }
722 break;
723 }
724 case BT_LT:
725 *nextTokPtr = ptr;
726 return XML_TOK_INVALID;
727 default:
728 ptr += MINBPC(enc);
729 break;
730 }
731 }
732 ptr += MINBPC(enc);
733 if (ptr == end)
734 return XML_TOK_PARTIAL;
735 switch (BYTE_TYPE(enc, ptr))
736 {
737 case BT_S:
738 case BT_CR:
739 case BT_LF:
740 break;
741 case BT_SOL:
742 goto sol;
743 case BT_GT:
744 goto gt;
745 default:
746 *nextTokPtr = ptr;
747 return XML_TOK_INVALID;
748 }
749 /* ptr points to closing quote */
750 for (;;)
751 {
752 ptr += MINBPC(enc);
753 if (ptr == end)
754 return XML_TOK_PARTIAL;
755 switch (BYTE_TYPE(enc, ptr))
756 {
757 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
758 case BT_S:
759 case BT_CR:
760 case BT_LF:
761 continue;
762 case BT_GT:
763 gt:
764 *nextTokPtr = ptr + MINBPC(enc);
765 return XML_TOK_START_TAG_WITH_ATTS;
766 case BT_SOL:
767 sol:
768 ptr += MINBPC(enc);
769 if (ptr == end)
770 return XML_TOK_PARTIAL;
771 if (!CHAR_MATCHES(enc, ptr, ASCII_GT))
772 {
773 *nextTokPtr = ptr;
774 return XML_TOK_INVALID;
775 }
776 *nextTokPtr = ptr + MINBPC(enc);
777 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
778 default:
779 *nextTokPtr = ptr;
780 return XML_TOK_INVALID;
781 }
782 break;
783 }
784 break;
785 }
786 default:
787 *nextTokPtr = ptr;
788 return XML_TOK_INVALID;
789 }
790 }
791 return XML_TOK_PARTIAL;
792}
793
794/* ptr points to character following "<" */
795
796static
797int EXPATENTRY PREFIX(scanLt) (const ENCODING * enc, const char *ptr, const char *end,
798 const char **nextTokPtr)
799{
800#ifdef XML_NS
801 int hadColon;
802
803#endif
804 if (ptr == end)
805 return XML_TOK_PARTIAL;
806 switch (BYTE_TYPE(enc, ptr))
807 {
808 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
809 case BT_EXCL:
810 if ((ptr += MINBPC(enc)) == end)
811 return XML_TOK_PARTIAL;
812 switch (BYTE_TYPE(enc, ptr))
813 {
814 case BT_MINUS:
815 return PREFIX(scanComment) (enc, ptr + MINBPC(enc), end, nextTokPtr);
816 case BT_LSQB:
817 return PREFIX(scanCdataSection) (enc, ptr + MINBPC(enc), end, nextTokPtr);
818 }
819 *nextTokPtr = ptr;
820 return XML_TOK_INVALID;
821 case BT_QUEST:
822 return PREFIX(scanPi) (enc, ptr + MINBPC(enc), end, nextTokPtr);
823 case BT_SOL:
824 return PREFIX(scanEndTag) (enc, ptr + MINBPC(enc), end, nextTokPtr);
825 default:
826 *nextTokPtr = ptr;
827 return XML_TOK_INVALID;
828 }
829#ifdef XML_NS
830 hadColon = 0;
831#endif
832 /* we have a start-tag */
833 while (ptr != end)
834 {
835 switch (BYTE_TYPE(enc, ptr))
836 {
837 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
838#ifdef XML_NS
839 case BT_COLON:
840 if (hadColon)
841 {
842 *nextTokPtr = ptr;
843 return XML_TOK_INVALID;
844 }
845 hadColon = 1;
846 ptr += MINBPC(enc);
847 if (ptr == end)
848 return XML_TOK_PARTIAL;
849 switch (BYTE_TYPE(enc, ptr))
850 {
851 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
852 default:
853 *nextTokPtr = ptr;
854 return XML_TOK_INVALID;
855 }
856 break;
857#endif
858 case BT_S:
859 case BT_CR:
860 case BT_LF:
861 {
862 ptr += MINBPC(enc);
863 while (ptr != end)
864 {
865 switch (BYTE_TYPE(enc, ptr))
866 {
867 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
868 case BT_GT:
869 goto gt;
870 case BT_SOL:
871 goto sol;
872 case BT_S:
873 case BT_CR:
874 case BT_LF:
875 ptr += MINBPC(enc);
876 continue;
877 default:
878 *nextTokPtr = ptr;
879 return XML_TOK_INVALID;
880 }
881 return PREFIX(scanAtts) (enc, ptr, end, nextTokPtr);
882 }
883 return XML_TOK_PARTIAL;
884 }
885 case BT_GT:
886 gt:
887 *nextTokPtr = ptr + MINBPC(enc);
888 return XML_TOK_START_TAG_NO_ATTS;
889 case BT_SOL:
890 sol:
891 ptr += MINBPC(enc);
892 if (ptr == end)
893 return XML_TOK_PARTIAL;
894 if (!CHAR_MATCHES(enc, ptr, ASCII_GT))
895 {
896 *nextTokPtr = ptr;
897 return XML_TOK_INVALID;
898 }
899 *nextTokPtr = ptr + MINBPC(enc);
900 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
901 default:
902 *nextTokPtr = ptr;
903 return XML_TOK_INVALID;
904 }
905 }
906 return XML_TOK_PARTIAL;
907}
908
909static
910int EXPATENTRY PREFIX(contentTok) (const ENCODING * enc, const char *ptr, const char *end,
911 const char **nextTokPtr)
912{
913 if (ptr == end)
914 return XML_TOK_NONE;
915 if (MINBPC(enc) > 1)
916 {
917 size_t n = end - ptr;
918
919 if (n & (MINBPC(enc) - 1))
920 {
921 n &= ~(MINBPC(enc) - 1);
922 if (n == 0)
923 return XML_TOK_PARTIAL;
924 end = ptr + n;
925 }
926 }
927 switch (BYTE_TYPE(enc, ptr))
928 {
929 case BT_LT:
930 return PREFIX(scanLt) (enc, ptr + MINBPC(enc), end, nextTokPtr);
931 case BT_AMP:
932 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr);
933 case BT_CR:
934 ptr += MINBPC(enc);
935 if (ptr == end)
936 return XML_TOK_TRAILING_CR;
937 if (BYTE_TYPE(enc, ptr) == BT_LF)
938 ptr += MINBPC(enc);
939 *nextTokPtr = ptr;
940 return XML_TOK_DATA_NEWLINE;
941 case BT_LF:
942 *nextTokPtr = ptr + MINBPC(enc);
943 return XML_TOK_DATA_NEWLINE;
944 case BT_RSQB:
945 ptr += MINBPC(enc);
946 if (ptr == end)
947 return XML_TOK_TRAILING_RSQB;
948 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
949 break;
950 ptr += MINBPC(enc);
951 if (ptr == end)
952 return XML_TOK_TRAILING_RSQB;
953 if (!CHAR_MATCHES(enc, ptr, ASCII_GT))
954 {
955 ptr -= MINBPC(enc);
956 break;
957 }
958 *nextTokPtr = ptr;
959 return XML_TOK_INVALID;
960 INVALID_CASES(ptr, nextTokPtr)
961 default:
962 ptr += MINBPC(enc);
963 break;
964 }
965 while (ptr != end)
966 {
967 switch (BYTE_TYPE(enc, ptr))
968 {
969#define LEAD_CASE(n) \
970 case BT_LEAD ## n: \
971 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
972 *nextTokPtr = ptr; \
973 return XML_TOK_DATA_CHARS; \
974 } \
975 ptr += n; \
976 break;
977 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
978#undef LEAD_CASE
979 case BT_RSQB:
980 if (ptr + MINBPC(enc) != end)
981 {
982 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB))
983 {
984 ptr += MINBPC(enc);
985 break;
986 }
987 if (ptr + 2 * MINBPC(enc) != end)
988 {
989 if (!CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT))
990 {
991 ptr += MINBPC(enc);
992 break;
993 }
994 *nextTokPtr = ptr + 2 * MINBPC(enc);
995 return XML_TOK_INVALID;
996 }
997 }
998 /* fall through */
999 case BT_AMP:
1000 case BT_LT:
1001 case BT_NONXML:
1002 case BT_MALFORM:
1003 case BT_TRAIL:
1004 case BT_CR:
1005 case BT_LF:
1006 *nextTokPtr = ptr;
1007 return XML_TOK_DATA_CHARS;
1008 default:
1009 ptr += MINBPC(enc);
1010 break;
1011 }
1012 }
1013 *nextTokPtr = ptr;
1014 return XML_TOK_DATA_CHARS;
1015}
1016
1017/* ptr points to character following "%" */
1018
1019static
1020int EXPATENTRY PREFIX(scanPercent) (const ENCODING * enc, const char *ptr, const char *end,
1021 const char **nextTokPtr)
1022{
1023 if (ptr == end)
1024 return XML_TOK_PARTIAL;
1025 switch (BYTE_TYPE(enc, ptr))
1026 {
1027 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
1028 case BT_S:
1029 case BT_LF:
1030 case BT_CR:
1031 case BT_PERCNT:
1032 *nextTokPtr = ptr;
1033 return XML_TOK_PERCENT;
1034 default:
1035 *nextTokPtr = ptr;
1036 return XML_TOK_INVALID;
1037 }
1038 while (ptr != end)
1039 {
1040 switch (BYTE_TYPE(enc, ptr))
1041 {
1042 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1043 case BT_SEMI:
1044 *nextTokPtr = ptr + MINBPC(enc);
1045 return XML_TOK_PARAM_ENTITY_REF;
1046 default:
1047 *nextTokPtr = ptr;
1048 return XML_TOK_INVALID;
1049 }
1050 }
1051 return XML_TOK_PARTIAL;
1052}
1053
1054static
1055int EXPATENTRY PREFIX(scanPoundName) (const ENCODING * enc, const char *ptr, const char *end,
1056 const char **nextTokPtr)
1057{
1058 if (ptr == end)
1059 return XML_TOK_PARTIAL;
1060 switch (BYTE_TYPE(enc, ptr))
1061 {
1062 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
1063 default:
1064 *nextTokPtr = ptr;
1065 return XML_TOK_INVALID;
1066 }
1067 while (ptr != end)
1068 {
1069 switch (BYTE_TYPE(enc, ptr))
1070 {
1071 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1072 case BT_CR:
1073 case BT_LF:
1074 case BT_S:
1075 case BT_RPAR:
1076 case BT_GT:
1077 case BT_PERCNT:
1078 case BT_VERBAR:
1079 *nextTokPtr = ptr;
1080 return XML_TOK_POUND_NAME;
1081 default:
1082 *nextTokPtr = ptr;
1083 return XML_TOK_INVALID;
1084 }
1085 }
1086 return -XML_TOK_POUND_NAME;
1087}
1088
1089static
1090int EXPATENTRY PREFIX(scanLit) (int open, const ENCODING * enc,
1091 const char *ptr, const char *end,
1092 const char **nextTokPtr)
1093{
1094 while (ptr != end)
1095 {
1096 int t = BYTE_TYPE(enc, ptr);
1097
1098 switch (t)
1099 {
1100 INVALID_CASES(ptr, nextTokPtr)
1101 case BT_QUOT:
1102 case BT_APOS:
1103 ptr += MINBPC(enc);
1104 if (t != open)
1105 break;
1106 if (ptr == end)
1107 return -XML_TOK_LITERAL;
1108 *nextTokPtr = ptr;
1109 switch (BYTE_TYPE(enc, ptr))
1110 {
1111 case BT_S:
1112 case BT_CR:
1113 case BT_LF:
1114 case BT_GT:
1115 case BT_PERCNT:
1116 case BT_LSQB:
1117 return XML_TOK_LITERAL;
1118 default:
1119 return XML_TOK_INVALID;
1120 }
1121 default:
1122 ptr += MINBPC(enc);
1123 break;
1124 }
1125 }
1126 return XML_TOK_PARTIAL;
1127}
1128
1129static
1130int EXPATENTRY PREFIX(prologTok) (const ENCODING * enc, const char *ptr, const char *end,
1131 const char **nextTokPtr)
1132{
1133 int tok;
1134
1135 if (ptr == end)
1136 return XML_TOK_NONE;
1137 if (MINBPC(enc) > 1)
1138 {
1139 size_t n = end - ptr;
1140
1141 if (n & (MINBPC(enc) - 1))
1142 {
1143 n &= ~(MINBPC(enc) - 1);
1144 if (n == 0)
1145 return XML_TOK_PARTIAL;
1146 end = ptr + n;
1147 }
1148 }
1149 switch (BYTE_TYPE(enc, ptr))
1150 {
1151 case BT_QUOT:
1152 return PREFIX(scanLit) (BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1153 case BT_APOS:
1154 return PREFIX(scanLit) (BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1155 case BT_LT:
1156 {
1157 ptr += MINBPC(enc);
1158 if (ptr == end)
1159 return XML_TOK_PARTIAL;
1160 switch (BYTE_TYPE(enc, ptr))
1161 {
1162 case BT_EXCL:
1163 return PREFIX(scanDecl) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1164 case BT_QUEST:
1165 return PREFIX(scanPi) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1166 case BT_NMSTRT:
1167 case BT_HEX:
1168 case BT_NONASCII:
1169 case BT_LEAD2:
1170 case BT_LEAD3:
1171 case BT_LEAD4:
1172 *nextTokPtr = ptr - MINBPC(enc);
1173 return XML_TOK_INSTANCE_START;
1174 }
1175 *nextTokPtr = ptr;
1176 return XML_TOK_INVALID;
1177 }
1178 case BT_CR:
1179 if (ptr + MINBPC(enc) == end)
1180 return -XML_TOK_PROLOG_S;
1181 /* fall through */
1182 case BT_S:
1183 case BT_LF:
1184 for (;;)
1185 {
1186 ptr += MINBPC(enc);
1187 if (ptr == end)
1188 break;
1189 switch (BYTE_TYPE(enc, ptr))
1190 {
1191 case BT_S:
1192 case BT_LF:
1193 break;
1194 case BT_CR:
1195 /* don't split CR/LF pair */
1196 if (ptr + MINBPC(enc) != end)
1197 break;
1198 /* fall through */
1199 default:
1200 *nextTokPtr = ptr;
1201 return XML_TOK_PROLOG_S;
1202 }
1203 }
1204 *nextTokPtr = ptr;
1205 return XML_TOK_PROLOG_S;
1206 case BT_PERCNT:
1207 return PREFIX(scanPercent) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1208 case BT_COMMA:
1209 *nextTokPtr = ptr + MINBPC(enc);
1210 return XML_TOK_COMMA;
1211 case BT_LSQB:
1212 *nextTokPtr = ptr + MINBPC(enc);
1213 return XML_TOK_OPEN_BRACKET;
1214 case BT_RSQB:
1215 ptr += MINBPC(enc);
1216 if (ptr == end)
1217 return -XML_TOK_CLOSE_BRACKET;
1218 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB))
1219 {
1220 if (ptr + MINBPC(enc) == end)
1221 return XML_TOK_PARTIAL;
1222 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT))
1223 {
1224 *nextTokPtr = ptr + 2 * MINBPC(enc);
1225 return XML_TOK_COND_SECT_CLOSE;
1226 }
1227 }
1228 *nextTokPtr = ptr;
1229 return XML_TOK_CLOSE_BRACKET;
1230 case BT_LPAR:
1231 *nextTokPtr = ptr + MINBPC(enc);
1232 return XML_TOK_OPEN_PAREN;
1233 case BT_RPAR:
1234 ptr += MINBPC(enc);
1235 if (ptr == end)
1236 return -XML_TOK_CLOSE_PAREN;
1237 switch (BYTE_TYPE(enc, ptr))
1238 {
1239 case BT_AST:
1240 *nextTokPtr = ptr + MINBPC(enc);
1241 return XML_TOK_CLOSE_PAREN_ASTERISK;
1242 case BT_QUEST:
1243 *nextTokPtr = ptr + MINBPC(enc);
1244 return XML_TOK_CLOSE_PAREN_QUESTION;
1245 case BT_PLUS:
1246 *nextTokPtr = ptr + MINBPC(enc);
1247 return XML_TOK_CLOSE_PAREN_PLUS;
1248 case BT_CR:
1249 case BT_LF:
1250 case BT_S:
1251 case BT_GT:
1252 case BT_COMMA:
1253 case BT_VERBAR:
1254 case BT_RPAR:
1255 *nextTokPtr = ptr;
1256 return XML_TOK_CLOSE_PAREN;
1257 }
1258 *nextTokPtr = ptr;
1259 return XML_TOK_INVALID;
1260 case BT_VERBAR:
1261 *nextTokPtr = ptr + MINBPC(enc);
1262 return XML_TOK_OR;
1263 case BT_GT:
1264 *nextTokPtr = ptr + MINBPC(enc);
1265 return XML_TOK_DECL_CLOSE;
1266 case BT_NUM:
1267 return PREFIX(scanPoundName) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1268#define LEAD_CASE(n) \
1269 case BT_LEAD ## n: \
1270 if (end - ptr < n) \
1271 return XML_TOK_PARTIAL_CHAR; \
1272 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1273 ptr += n; \
1274 tok = XML_TOK_NAME; \
1275 break; \
1276 } \
1277 if (IS_NAME_CHAR(enc, ptr, n)) { \
1278 ptr += n; \
1279 tok = XML_TOK_NMTOKEN; \
1280 break; \
1281 } \
1282 *nextTokPtr = ptr; \
1283 return XML_TOK_INVALID;
1284 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1285#undef LEAD_CASE
1286 case BT_NMSTRT:
1287 case BT_HEX:
1288 tok = XML_TOK_NAME;
1289 ptr += MINBPC(enc);
1290 break;
1291 case BT_DIGIT:
1292 case BT_NAME:
1293 case BT_MINUS:
1294#ifdef XML_NS
1295 case BT_COLON:
1296#endif
1297 tok = XML_TOK_NMTOKEN;
1298 ptr += MINBPC(enc);
1299 break;
1300 case BT_NONASCII:
1301 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr))
1302 {
1303 ptr += MINBPC(enc);
1304 tok = XML_TOK_NAME;
1305 break;
1306 }
1307 if (IS_NAME_CHAR_MINBPC(enc, ptr))
1308 {
1309 ptr += MINBPC(enc);
1310 tok = XML_TOK_NMTOKEN;
1311 break;
1312 }
1313 /* fall through */
1314 default:
1315 *nextTokPtr = ptr;
1316 return XML_TOK_INVALID;
1317 }
1318 while (ptr != end)
1319 {
1320 switch (BYTE_TYPE(enc, ptr))
1321 {
1322 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1323 case BT_GT:
1324 case BT_RPAR:
1325 case BT_COMMA:
1326 case BT_VERBAR:
1327 case BT_LSQB:
1328 case BT_PERCNT:
1329 case BT_S:
1330 case BT_CR:
1331 case BT_LF:
1332 *nextTokPtr = ptr;
1333 return tok;
1334#ifdef XML_NS
1335 case BT_COLON:
1336 ptr += MINBPC(enc);
1337 switch (tok)
1338 {
1339 case XML_TOK_NAME:
1340 if (ptr == end)
1341 return XML_TOK_PARTIAL;
1342 tok = XML_TOK_PREFIXED_NAME;
1343 switch (BYTE_TYPE(enc, ptr))
1344 {
1345 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1346 default:
1347 tok = XML_TOK_NMTOKEN;
1348 break;
1349 }
1350 break;
1351 case XML_TOK_PREFIXED_NAME:
1352 tok = XML_TOK_NMTOKEN;
1353 break;
1354 }
1355 break;
1356#endif
1357 case BT_PLUS:
1358 if (tok == XML_TOK_NMTOKEN)
1359 {
1360 *nextTokPtr = ptr;
1361 return XML_TOK_INVALID;
1362 }
1363 *nextTokPtr = ptr + MINBPC(enc);
1364 return XML_TOK_NAME_PLUS;
1365 case BT_AST:
1366 if (tok == XML_TOK_NMTOKEN)
1367 {
1368 *nextTokPtr = ptr;
1369 return XML_TOK_INVALID;
1370 }
1371 *nextTokPtr = ptr + MINBPC(enc);
1372 return XML_TOK_NAME_ASTERISK;
1373 case BT_QUEST:
1374 if (tok == XML_TOK_NMTOKEN)
1375 {
1376 *nextTokPtr = ptr;
1377 return XML_TOK_INVALID;
1378 }
1379 *nextTokPtr = ptr + MINBPC(enc);
1380 return XML_TOK_NAME_QUESTION;
1381 default:
1382 *nextTokPtr = ptr;
1383 return XML_TOK_INVALID;
1384 }
1385 }
1386 return -tok;
1387}
1388
1389static
1390int EXPATENTRY PREFIX(attributeValueTok) (const ENCODING * enc, const char *ptr, const char *end,
1391 const char **nextTokPtr)
1392{
1393 const char *start;
1394
1395 if (ptr == end)
1396 return XML_TOK_NONE;
1397 start = ptr;
1398 while (ptr != end)
1399 {
1400 switch (BYTE_TYPE(enc, ptr))
1401 {
1402#define LEAD_CASE(n) \
1403 case BT_LEAD ## n: ptr += n; break;
1404 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1405#undef LEAD_CASE
1406 case BT_AMP:
1407 if (ptr == start)
1408 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1409 *nextTokPtr = ptr;
1410 return XML_TOK_DATA_CHARS;
1411 case BT_LT:
1412 /* this is for inside entity references */
1413 *nextTokPtr = ptr;
1414 return XML_TOK_INVALID;
1415 case BT_LF:
1416 if (ptr == start)
1417 {
1418 *nextTokPtr = ptr + MINBPC(enc);
1419 return XML_TOK_DATA_NEWLINE;
1420 }
1421 *nextTokPtr = ptr;
1422 return XML_TOK_DATA_CHARS;
1423 case BT_CR:
1424 if (ptr == start)
1425 {
1426 ptr += MINBPC(enc);
1427 if (ptr == end)
1428 return XML_TOK_TRAILING_CR;
1429 if (BYTE_TYPE(enc, ptr) == BT_LF)
1430 ptr += MINBPC(enc);
1431 *nextTokPtr = ptr;
1432 return XML_TOK_DATA_NEWLINE;
1433 }
1434 *nextTokPtr = ptr;
1435 return XML_TOK_DATA_CHARS;
1436 case BT_S:
1437 if (ptr == start)
1438 {
1439 *nextTokPtr = ptr + MINBPC(enc);
1440 return XML_TOK_ATTRIBUTE_VALUE_S;
1441 }
1442 *nextTokPtr = ptr;
1443 return XML_TOK_DATA_CHARS;
1444 default:
1445 ptr += MINBPC(enc);
1446 break;
1447 }
1448 }
1449 *nextTokPtr = ptr;
1450 return XML_TOK_DATA_CHARS;
1451}
1452
1453static
1454int EXPATENTRY PREFIX(entityValueTok) (const ENCODING * enc, const char *ptr, const char *end,
1455 const char **nextTokPtr)
1456{
1457 const char *start;
1458
1459 if (ptr == end)
1460 return XML_TOK_NONE;
1461 start = ptr;
1462 while (ptr != end)
1463 {
1464 switch (BYTE_TYPE(enc, ptr))
1465 {
1466#define LEAD_CASE(n) \
1467 case BT_LEAD ## n: ptr += n; break;
1468 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1469#undef LEAD_CASE
1470 case BT_AMP:
1471 if (ptr == start)
1472 return PREFIX(scanRef) (enc, ptr + MINBPC(enc), end, nextTokPtr);
1473 *nextTokPtr = ptr;
1474 return XML_TOK_DATA_CHARS;
1475 case BT_PERCNT:
1476 if (ptr == start)
1477 {
1478 int tok = PREFIX(scanPercent) (enc, ptr + MINBPC(enc),
1479 end, nextTokPtr);
1480
1481 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1482 }
1483 *nextTokPtr = ptr;
1484 return XML_TOK_DATA_CHARS;
1485 case BT_LF:
1486 if (ptr == start)
1487 {
1488 *nextTokPtr = ptr + MINBPC(enc);
1489 return XML_TOK_DATA_NEWLINE;
1490 }
1491 *nextTokPtr = ptr;
1492 return XML_TOK_DATA_CHARS;
1493 case BT_CR:
1494 if (ptr == start)
1495 {
1496 ptr += MINBPC(enc);
1497 if (ptr == end)
1498 return XML_TOK_TRAILING_CR;
1499 if (BYTE_TYPE(enc, ptr) == BT_LF)
1500 ptr += MINBPC(enc);
1501 *nextTokPtr = ptr;
1502 return XML_TOK_DATA_NEWLINE;
1503 }
1504 *nextTokPtr = ptr;
1505 return XML_TOK_DATA_CHARS;
1506 default:
1507 ptr += MINBPC(enc);
1508 break;
1509 }
1510 }
1511 *nextTokPtr = ptr;
1512 return XML_TOK_DATA_CHARS;
1513}
1514
1515#ifdef XML_DTD
1516
1517static
1518int EXPATENTRY PREFIX(ignoreSectionTok) (const ENCODING * enc, const char *ptr, const char *end,
1519 const char **nextTokPtr)
1520{
1521 int level = 0;
1522
1523 if (MINBPC(enc) > 1)
1524 {
1525 size_t n = end - ptr;
1526
1527 if (n & (MINBPC(enc) - 1))
1528 {
1529 n &= ~(MINBPC(enc) - 1);
1530 end = ptr + n;
1531 }
1532 }
1533 while (ptr != end)
1534 {
1535 switch (BYTE_TYPE(enc, ptr))
1536 {
1537 INVALID_CASES(ptr, nextTokPtr)
1538 case BT_LT:
1539 if ((ptr += MINBPC(enc)) == end)
1540 return XML_TOK_PARTIAL;
1541 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL))
1542 {
1543 if ((ptr += MINBPC(enc)) == end)
1544 return XML_TOK_PARTIAL;
1545 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB))
1546 {
1547 ++level;
1548 ptr += MINBPC(enc);
1549 }
1550 }
1551 break;
1552 case BT_RSQB:
1553 if ((ptr += MINBPC(enc)) == end)
1554 return XML_TOK_PARTIAL;
1555 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB))
1556 {
1557 if ((ptr += MINBPC(enc)) == end)
1558 return XML_TOK_PARTIAL;
1559 if (CHAR_MATCHES(enc, ptr, ASCII_GT))
1560 {
1561 ptr += MINBPC(enc);
1562 if (level == 0)
1563 {
1564 *nextTokPtr = ptr;
1565 return XML_TOK_IGNORE_SECT;
1566 }
1567 --level;
1568 }
1569 }
1570 break;
1571 default:
1572 ptr += MINBPC(enc);
1573 break;
1574 }
1575 }
1576 return XML_TOK_PARTIAL;
1577}
1578
1579#endif /* XML_DTD */
1580
1581static
1582int EXPATENTRY PREFIX(isPublicId) (const ENCODING * enc, const char *ptr, const char *end,
1583 const char **badPtr)
1584{
1585 ptr += MINBPC(enc);
1586 end -= MINBPC(enc);
1587 for (; ptr != end; ptr += MINBPC(enc))
1588 {
1589 switch (BYTE_TYPE(enc, ptr))
1590 {
1591 case BT_DIGIT:
1592 case BT_HEX:
1593 case BT_MINUS:
1594 case BT_APOS:
1595 case BT_LPAR:
1596 case BT_RPAR:
1597 case BT_PLUS:
1598 case BT_COMMA:
1599 case BT_SOL:
1600 case BT_EQUALS:
1601 case BT_QUEST:
1602 case BT_CR:
1603 case BT_LF:
1604 case BT_SEMI:
1605 case BT_EXCL:
1606 case BT_AST:
1607 case BT_PERCNT:
1608 case BT_NUM:
1609#ifdef XML_NS
1610 case BT_COLON:
1611#endif
1612 break;
1613 case BT_S:
1614 if (CHAR_MATCHES(enc, ptr, ASCII_TAB))
1615 {
1616 *badPtr = ptr;
1617 return 0;
1618 }
1619 break;
1620 case BT_NAME:
1621 case BT_NMSTRT:
1622 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1623 break;
1624 default:
1625 switch (BYTE_TO_ASCII(enc, ptr))
1626 {
1627 case 0x24: /* $ */
1628 case 0x40: /* @ */
1629 break;
1630 default:
1631 *badPtr = ptr;
1632 return 0;
1633 }
1634 break;
1635 }
1636 }
1637 return 1;
1638}
1639
1640/* This must only be called for a well-formed start-tag or empty element tag.
1641 * Returns the number of attributes. Pointers to the first attsMax attributes
1642 * are stored in atts. */
1643
1644static
1645int EXPATENTRY PREFIX(getAtts) (const ENCODING * enc, const char *ptr,
1646 int attsMax, ATTRIBUTE * atts)
1647{
1648 enum
1649 {
1650 other, inName, inValue
1651 }
1652 state = inName;
1653 int nAtts = 0;
1654 int open = 0; /* defined when state == inValue;
1655
1656 * initialization just to shut up compilers */
1657
1658 for (ptr += MINBPC(enc);; ptr += MINBPC(enc))
1659 {
1660 switch (BYTE_TYPE(enc, ptr))
1661 {
1662#define START_NAME \
1663 if (state == other) { \
1664 if (nAtts < attsMax) { \
1665 atts[nAtts].name = ptr; \
1666 atts[nAtts].normalized = 1; \
1667 } \
1668 state = inName; \
1669 }
1670#define LEAD_CASE(n) \
1671 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1672 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1673#undef LEAD_CASE
1674 case BT_NONASCII:
1675 case BT_NMSTRT:
1676 case BT_HEX:
1677 START_NAME
1678 break;
1679#undef START_NAME
1680 case BT_QUOT:
1681 if (state != inValue)
1682 {
1683 if (nAtts < attsMax)
1684 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1685 state = inValue;
1686 open = BT_QUOT;
1687 }
1688 else if (open == BT_QUOT)
1689 {
1690 state = other;
1691 if (nAtts < attsMax)
1692 atts[nAtts].valueEnd = ptr;
1693 nAtts++;
1694 }
1695 break;
1696 case BT_APOS:
1697 if (state != inValue)
1698 {
1699 if (nAtts < attsMax)
1700 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1701 state = inValue;
1702 open = BT_APOS;
1703 }
1704 else if (open == BT_APOS)
1705 {
1706 state = other;
1707 if (nAtts < attsMax)
1708 atts[nAtts].valueEnd = ptr;
1709 nAtts++;
1710 }
1711 break;
1712 case BT_AMP:
1713 if (nAtts < attsMax)
1714 atts[nAtts].normalized = 0;
1715 break;
1716 case BT_S:
1717 if (state == inName)
1718 state = other;
1719 else if (state == inValue
1720 && nAtts < attsMax
1721 && atts[nAtts].normalized
1722 && (ptr == atts[nAtts].valuePtr
1723 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1724 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1725 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1726 atts[nAtts].normalized = 0;
1727 break;
1728 case BT_CR:
1729 case BT_LF:
1730 /* This case ensures that the first attribute name is counted
1731 * Apart from that we could just change state on the quote. */
1732 if (state == inName)
1733 state = other;
1734 else if (state == inValue && nAtts < attsMax)
1735 atts[nAtts].normalized = 0;
1736 break;
1737 case BT_GT:
1738 case BT_SOL:
1739 if (state != inValue)
1740 return nAtts;
1741 break;
1742 default:
1743 break;
1744 }
1745 }
1746 /* not reached */
1747}
1748
1749static
1750int EXPATENTRY PREFIX(charRefNumber) (const ENCODING * enc, const char *ptr)
1751{
1752 int result = 0;
1753
1754 /* skip &# */
1755 ptr += 2 * MINBPC(enc);
1756 if (CHAR_MATCHES(enc, ptr, ASCII_x))
1757 {
1758 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc))
1759 {
1760 int c = BYTE_TO_ASCII(enc, ptr);
1761
1762 switch (c)
1763 {
1764 case ASCII_0:
1765 case ASCII_1:
1766 case ASCII_2:
1767 case ASCII_3:
1768 case ASCII_4:
1769 case ASCII_5:
1770 case ASCII_6:
1771 case ASCII_7:
1772 case ASCII_8:
1773 case ASCII_9:
1774 result <<= 4;
1775 result |= (c - ASCII_0);
1776 break;
1777 case ASCII_A:
1778 case ASCII_B:
1779 case ASCII_C:
1780 case ASCII_D:
1781 case ASCII_E:
1782 case ASCII_F:
1783 result <<= 4;
1784 result += 10 + (c - ASCII_A);
1785 break;
1786 case ASCII_a:
1787 case ASCII_b:
1788 case ASCII_c:
1789 case ASCII_d:
1790 case ASCII_e:
1791 case ASCII_f:
1792 result <<= 4;
1793 result += 10 + (c - ASCII_a);
1794 break;
1795 }
1796 if (result >= 0x110000)
1797 return -1;
1798 }
1799 }
1800 else
1801 {
1802 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc))
1803 {
1804 int c = BYTE_TO_ASCII(enc, ptr);
1805
1806 result *= 10;
1807 result += (c - ASCII_0);
1808 if (result >= 0x110000)
1809 return -1;
1810 }
1811 }
1812 return checkCharRefNumber(result);
1813}
1814
1815static
1816int EXPATENTRY PREFIX(predefinedEntityName) (const ENCODING * enc, const char *ptr, const char *end)
1817{
1818 switch ((end - ptr) / MINBPC(enc))
1819 {
1820 case 2:
1821 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t))
1822 {
1823 switch (BYTE_TO_ASCII(enc, ptr))
1824 {
1825 case ASCII_l:
1826 return ASCII_LT;
1827 case ASCII_g:
1828 return ASCII_GT;
1829 }
1830 }
1831 break;
1832 case 3:
1833 if (CHAR_MATCHES(enc, ptr, ASCII_a))
1834 {
1835 ptr += MINBPC(enc);
1836 if (CHAR_MATCHES(enc, ptr, ASCII_m))
1837 {
1838 ptr += MINBPC(enc);
1839 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1840 return ASCII_AMP;
1841 }
1842 }
1843 break;
1844 case 4:
1845 switch (BYTE_TO_ASCII(enc, ptr))
1846 {
1847 case ASCII_q:
1848 ptr += MINBPC(enc);
1849 if (CHAR_MATCHES(enc, ptr, ASCII_u))
1850 {
1851 ptr += MINBPC(enc);
1852 if (CHAR_MATCHES(enc, ptr, ASCII_o))
1853 {
1854 ptr += MINBPC(enc);
1855 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1856 return ASCII_QUOT;
1857 }
1858 }
1859 break;
1860 case ASCII_a:
1861 ptr += MINBPC(enc);
1862 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1863 {
1864 ptr += MINBPC(enc);
1865 if (CHAR_MATCHES(enc, ptr, ASCII_o))
1866 {
1867 ptr += MINBPC(enc);
1868 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1869 return ASCII_APOS;
1870 }
1871 }
1872 break;
1873 }
1874 }
1875 return 0;
1876}
1877
1878static
1879int EXPATENTRY PREFIX(sameName) (const ENCODING * enc, const char *ptr1, const char *ptr2)
1880{
1881 for (;;)
1882 {
1883 switch (BYTE_TYPE(enc, ptr1))
1884 {
1885#define LEAD_CASE(n) \
1886 case BT_LEAD ## n: \
1887 if (*ptr1++ != *ptr2++) \
1888 return 0;
1889 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1890#undef LEAD_CASE
1891 /* fall through */
1892 if (*ptr1++ != *ptr2++)
1893 return 0;
1894 break;
1895 case BT_NONASCII:
1896 case BT_NMSTRT:
1897#ifdef XML_NS
1898 case BT_COLON:
1899#endif
1900 case BT_HEX:
1901 case BT_DIGIT:
1902 case BT_NAME:
1903 case BT_MINUS:
1904 if (*ptr2++ != *ptr1++)
1905 return 0;
1906 if (MINBPC(enc) > 1)
1907 {
1908 if (*ptr2++ != *ptr1++)
1909 return 0;
1910 if (MINBPC(enc) > 2)
1911 {
1912 if (*ptr2++ != *ptr1++)
1913 return 0;
1914 if (MINBPC(enc) > 3)
1915 {
1916 if (*ptr2++ != *ptr1++)
1917 return 0;
1918 }
1919 }
1920 }
1921 break;
1922 default:
1923 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1924 return 1;
1925 switch (BYTE_TYPE(enc, ptr2))
1926 {
1927 case BT_LEAD2:
1928 case BT_LEAD3:
1929 case BT_LEAD4:
1930 case BT_NONASCII:
1931 case BT_NMSTRT:
1932#ifdef XML_NS
1933 case BT_COLON:
1934#endif
1935 case BT_HEX:
1936 case BT_DIGIT:
1937 case BT_NAME:
1938 case BT_MINUS:
1939 return 0;
1940 default:
1941 return 1;
1942 }
1943 }
1944 }
1945 /* not reached */
1946}
1947
1948static
1949int EXPATENTRY PREFIX(nameMatchesAscii) (const ENCODING * enc, const char *ptr1,
1950 const char *end1, const char *ptr2)
1951{
1952 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++)
1953 {
1954 if (ptr1 == end1)
1955 return 0;
1956 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1957 return 0;
1958 }
1959 return ptr1 == end1;
1960}
1961
1962static
1963int EXPATENTRY PREFIX(nameLength) (const ENCODING * enc, const char *ptr)
1964{
1965 const char *start = ptr;
1966
1967 for (;;)
1968 {
1969 switch (BYTE_TYPE(enc, ptr))
1970 {
1971#define LEAD_CASE(n) \
1972 case BT_LEAD ## n: ptr += n; break;
1973 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1974#undef LEAD_CASE
1975 case BT_NONASCII:
1976 case BT_NMSTRT:
1977#ifdef XML_NS
1978 case BT_COLON:
1979#endif
1980 case BT_HEX:
1981 case BT_DIGIT:
1982 case BT_NAME:
1983 case BT_MINUS:
1984 ptr += MINBPC(enc);
1985 break;
1986 default:
1987 return ptr - start;
1988 }
1989 }
1990}
1991
1992static
1993const char *EXPATENTRY PREFIX(skipS) (const ENCODING * enc, const char *ptr)
1994{
1995 for (;;)
1996 {
1997 switch (BYTE_TYPE(enc, ptr))
1998 {
1999 case BT_LF:
2000 case BT_CR:
2001 case BT_S:
2002 ptr += MINBPC(enc);
2003 break;
2004 default:
2005 return ptr;
2006 }
2007 }
2008}
2009
2010static
2011void EXPATENTRY PREFIX(updatePosition) (const ENCODING * enc,
2012 const char *ptr,
2013 const char *end,
2014 POSITION * pos)
2015{
2016 while (ptr != end)
2017 {
2018 switch (BYTE_TYPE(enc, ptr))
2019 {
2020#define LEAD_CASE(n) \
2021 case BT_LEAD ## n: \
2022 ptr += n; \
2023 break;
2024 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
2025#undef LEAD_CASE
2026 case BT_LF:
2027 pos->columnNumber = (unsigned)-1;
2028 pos->lineNumber++;
2029 ptr += MINBPC(enc);
2030 break;
2031 case BT_CR:
2032 pos->lineNumber++;
2033 ptr += MINBPC(enc);
2034 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
2035 ptr += MINBPC(enc);
2036 pos->columnNumber = (unsigned)-1;
2037 break;
2038 default:
2039 ptr += MINBPC(enc);
2040 break;
2041 }
2042 pos->columnNumber++;
2043 }
2044}
2045
2046#undef DO_LEAD_CASE
2047#undef MULTIBYTE_CASES
2048#undef INVALID_CASES
2049#undef CHECK_NAME_CASE
2050#undef CHECK_NAME_CASES
2051#undef CHECK_NMSTRT_CASE
2052#undef CHECK_NMSTRT_CASES
Note: See TracBrowser for help on using the repository browser.