source: branches/branch-1-0/src/helpers/xmltok_impl.c

Last change on this file was 147, checked in by umoeller, 23 years ago

Misc updates for Unicode.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 47.7 KB
Line 
1
2/*
3 *sourcefile xmltok_impl.c
4 * part of the expat implementation. See xmlparse.c.
5 *
6 * NOTE: This file must not be compiled directly. It is
7 * #include'd from xmltok.c several times.
8 */
9
10/*
11 * Copyright (C) 2001 Ulrich M”ller.
12 * Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd.
13 * and Clark Cooper.
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the
17 * "Software"), to deal in the Software without restriction, including
18 * without limitation the rights to use, copy, modify, merge, publish,
19 * distribute, sublicense, and/or sell copies of the Software, and to
20 * permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included
24 * in all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
29 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
30 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
31 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
32 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 */
34
35#ifndef IS_INVALID_CHAR
36#define IS_INVALID_CHAR(enc, ptr, n) (0)
37#endif
38
39#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
40 case BT_LEAD ## n: \
41 if (end - ptr < n) \
42 return XML_TOK_PARTIAL_CHAR; \
43 if (IS_INVALID_CHAR(enc, ptr, n)) { \
44 *(nextTokPtr) = (ptr); \
45 return XML_TOK_INVALID; \
46 } \
47 ptr += n; \
48 break;
49
50#define INVALID_CASES(ptr, nextTokPtr) \
51 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
52 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
53 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
54 case BT_NONXML: \
55 case BT_MALFORM: \
56 case BT_TRAIL: \
57 *(nextTokPtr) = (ptr); \
58 return XML_TOK_INVALID;
59
60#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
61 case BT_LEAD ## n: \
62 if (end - ptr < n) \
63 return XML_TOK_PARTIAL_CHAR; \
64 if (!IS_NAME_CHAR(enc, ptr, n)) { \
65 *nextTokPtr = ptr; \
66 return XML_TOK_INVALID; \
67 } \
68 ptr += n; \
69 break;
70
71#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
72 case BT_NONASCII: \
73 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
74 *nextTokPtr = ptr; \
75 return XML_TOK_INVALID; \
76 } \
77 case BT_NMSTRT: \
78 case BT_HEX: \
79 case BT_DIGIT: \
80 case BT_NAME: \
81 case BT_MINUS: \
82 ptr += MINBPC(enc); \
83 break; \
84 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
85 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
86 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
87
88#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
89 case BT_LEAD ## n: \
90 if (end - ptr < n) \
91 return XML_TOK_PARTIAL_CHAR; \
92 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
93 *nextTokPtr = ptr; \
94 return XML_TOK_INVALID; \
95 } \
96 ptr += n; \
97 break;
98
99#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
100 case BT_NONASCII: \
101 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
102 *nextTokPtr = ptr; \
103 return XML_TOK_INVALID; \
104 } \
105 case BT_NMSTRT: \
106 case BT_HEX: \
107 ptr += MINBPC(enc); \
108 break; \
109 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
110 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
111 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
112
113#ifndef PREFIX
114#define PREFIX(ident) ident
115#endif
116
117/* ptr points to character following "<!-" */
118
119static int EXPATENTRY PREFIX(scanComment)(const ENCODING *enc,
120 const char *ptr,
121 const char *end,
122 const char **nextTokPtr)
123{
124 if (ptr != end) {
125 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
126 *nextTokPtr = ptr;
127 return XML_TOK_INVALID;
128 }
129 ptr += MINBPC(enc);
130 while (ptr != end) {
131 switch (BYTE_TYPE(enc, ptr)) {
132 INVALID_CASES(ptr, nextTokPtr)
133 case BT_MINUS:
134 if ((ptr += MINBPC(enc)) == end)
135 return XML_TOK_PARTIAL;
136 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
137 if ((ptr += MINBPC(enc)) == end)
138 return XML_TOK_PARTIAL;
139 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
140 *nextTokPtr = ptr;
141 return XML_TOK_INVALID;
142 }
143 *nextTokPtr = ptr + MINBPC(enc);
144 return XML_TOK_COMMENT;
145 }
146 break;
147 default:
148 ptr += MINBPC(enc);
149 break;
150 }
151 }
152 }
153 return XML_TOK_PARTIAL;
154}
155
156/* ptr points to character following "<!" */
157
158static int EXPATENTRY PREFIX(scanDecl)(const ENCODING *enc,
159 const char *ptr,
160 const char *end,
161 const char **nextTokPtr)
162{
163 if (ptr == end)
164 return XML_TOK_PARTIAL;
165 switch (BYTE_TYPE(enc, ptr)) {
166 case BT_MINUS:
167 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
168 case BT_LSQB:
169 *nextTokPtr = ptr + MINBPC(enc);
170 return XML_TOK_COND_SECT_OPEN;
171 case BT_NMSTRT:
172 case BT_HEX:
173 ptr += MINBPC(enc);
174 break;
175 default:
176 *nextTokPtr = ptr;
177 return XML_TOK_INVALID;
178 }
179 while (ptr != end) {
180 switch (BYTE_TYPE(enc, ptr)) {
181 case BT_PERCNT:
182 if (ptr + MINBPC(enc) == end)
183 return XML_TOK_PARTIAL;
184 /* don't allow <!ENTITY% foo "whatever"> */
185 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
186 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
187 *nextTokPtr = ptr;
188 return XML_TOK_INVALID;
189 }
190 /* fall through */
191 case BT_S: case BT_CR: case BT_LF:
192 *nextTokPtr = ptr;
193 return XML_TOK_DECL_OPEN;
194 case BT_NMSTRT:
195 case BT_HEX:
196 ptr += MINBPC(enc);
197 break;
198 default:
199 *nextTokPtr = ptr;
200 return XML_TOK_INVALID;
201 }
202 }
203 return XML_TOK_PARTIAL;
204}
205
206static int EXPATENTRY PREFIX(checkPiTarget)(const ENCODING *enc,
207 const char *ptr,
208 const char *end,
209 int *tokPtr)
210{
211 int upper = 0;
212 *tokPtr = XML_TOK_PI;
213 if (end - ptr != MINBPC(enc)*3)
214 return 1;
215 switch (BYTE_TO_ASCII(enc, ptr)) {
216 case ASCII_x:
217 break;
218 case ASCII_X:
219 upper = 1;
220 break;
221 default:
222 return 1;
223 }
224 ptr += MINBPC(enc);
225 switch (BYTE_TO_ASCII(enc, ptr)) {
226 case ASCII_m:
227 break;
228 case ASCII_M:
229 upper = 1;
230 break;
231 default:
232 return 1;
233 }
234 ptr += MINBPC(enc);
235 switch (BYTE_TO_ASCII(enc, ptr)) {
236 case ASCII_l:
237 break;
238 case ASCII_L:
239 upper = 1;
240 break;
241 default:
242 return 1;
243 }
244 if (upper)
245 return 0;
246 *tokPtr = XML_TOK_XML_DECL;
247 return 1;
248}
249
250/* ptr points to character following "<?" */
251
252static int EXPATENTRY PREFIX(scanPi)(const ENCODING *enc,
253 const char *ptr,
254 const char *end,
255 const char **nextTokPtr)
256{
257 int tok;
258 const char *target = ptr;
259 if (ptr == end)
260 return XML_TOK_PARTIAL;
261 switch (BYTE_TYPE(enc, ptr)) {
262 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
263 default:
264 *nextTokPtr = ptr;
265 return XML_TOK_INVALID;
266 }
267 while (ptr != end) {
268 switch (BYTE_TYPE(enc, ptr)) {
269 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
270 case BT_S: case BT_CR: case BT_LF:
271 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
272 *nextTokPtr = ptr;
273 return XML_TOK_INVALID;
274 }
275 ptr += MINBPC(enc);
276 while (ptr != end) {
277 switch (BYTE_TYPE(enc, ptr)) {
278 INVALID_CASES(ptr, nextTokPtr)
279 case BT_QUEST:
280 ptr += MINBPC(enc);
281 if (ptr == end)
282 return XML_TOK_PARTIAL;
283 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
284 *nextTokPtr = ptr + MINBPC(enc);
285 return tok;
286 }
287 break;
288 default:
289 ptr += MINBPC(enc);
290 break;
291 }
292 }
293 return XML_TOK_PARTIAL;
294 case BT_QUEST:
295 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
296 *nextTokPtr = ptr;
297 return XML_TOK_INVALID;
298 }
299 ptr += MINBPC(enc);
300 if (ptr == end)
301 return XML_TOK_PARTIAL;
302 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
303 *nextTokPtr = ptr + MINBPC(enc);
304 return tok;
305 }
306 /* fall through */
307 default:
308 *nextTokPtr = ptr;
309 return XML_TOK_INVALID;
310 }
311 }
312 return XML_TOK_PARTIAL;
313}
314
315
316static int EXPATENTRY PREFIX(scanCdataSection)(const ENCODING *enc,
317 const char *ptr,
318 const char *end,
319 const char **nextTokPtr)
320{
321 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
322 int i;
323 /* CDATA[ */
324 if (end - ptr < 6 * MINBPC(enc))
325 return XML_TOK_PARTIAL;
326 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
327 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
328 *nextTokPtr = ptr;
329 return XML_TOK_INVALID;
330 }
331 }
332 *nextTokPtr = ptr;
333 return XML_TOK_CDATA_SECT_OPEN;
334}
335
336static int EXPATENTRY PREFIX(cdataSectionTok)(const ENCODING *enc,
337 const char *ptr,
338 const char *end,
339 const char **nextTokPtr)
340{
341 if (ptr == end)
342 return XML_TOK_NONE;
343 if (MINBPC(enc) > 1) {
344 size_t n = end - ptr;
345 if (n & (MINBPC(enc) - 1)) {
346 n &= ~(MINBPC(enc) - 1);
347 if (n == 0)
348 return XML_TOK_PARTIAL;
349 end = ptr + n;
350 }
351 }
352 switch (BYTE_TYPE(enc, ptr)) {
353 case BT_RSQB:
354 ptr += MINBPC(enc);
355 if (ptr == end)
356 return XML_TOK_PARTIAL;
357 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
358 break;
359 ptr += MINBPC(enc);
360 if (ptr == end)
361 return XML_TOK_PARTIAL;
362 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
363 ptr -= MINBPC(enc);
364 break;
365 }
366 *nextTokPtr = ptr + MINBPC(enc);
367 return XML_TOK_CDATA_SECT_CLOSE;
368 case BT_CR:
369 ptr += MINBPC(enc);
370 if (ptr == end)
371 return XML_TOK_PARTIAL;
372 if (BYTE_TYPE(enc, ptr) == BT_LF)
373 ptr += MINBPC(enc);
374 *nextTokPtr = ptr;
375 return XML_TOK_DATA_NEWLINE;
376 case BT_LF:
377 *nextTokPtr = ptr + MINBPC(enc);
378 return XML_TOK_DATA_NEWLINE;
379 INVALID_CASES(ptr, nextTokPtr)
380 default:
381 ptr += MINBPC(enc);
382 break;
383 }
384 while (ptr != end) {
385 switch (BYTE_TYPE(enc, ptr)) {
386#define LEAD_CASE(n) \
387 case BT_LEAD ## n: \
388 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
389 *nextTokPtr = ptr; \
390 return XML_TOK_DATA_CHARS; \
391 } \
392 ptr += n; \
393 break;
394 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
395#undef LEAD_CASE
396 case BT_NONXML:
397 case BT_MALFORM:
398 case BT_TRAIL:
399 case BT_CR:
400 case BT_LF:
401 case BT_RSQB:
402 *nextTokPtr = ptr;
403 return XML_TOK_DATA_CHARS;
404 default:
405 ptr += MINBPC(enc);
406 break;
407 }
408 }
409 *nextTokPtr = ptr;
410 return XML_TOK_DATA_CHARS;
411}
412
413/* ptr points to character following "</" */
414
415static int EXPATENTRY PREFIX(scanEndTag)(const ENCODING *enc,
416 const char *ptr,
417 const char *end,
418 const char **nextTokPtr)
419{
420 if (ptr == end)
421 return XML_TOK_PARTIAL;
422 switch (BYTE_TYPE(enc, ptr)) {
423 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
424 default:
425 *nextTokPtr = ptr;
426 return XML_TOK_INVALID;
427 }
428 while (ptr != end) {
429 switch (BYTE_TYPE(enc, ptr)) {
430 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
431 case BT_S: case BT_CR: case BT_LF:
432 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
433 switch (BYTE_TYPE(enc, ptr)) {
434 case BT_S: case BT_CR: case BT_LF:
435 break;
436 case BT_GT:
437 *nextTokPtr = ptr + MINBPC(enc);
438 return XML_TOK_END_TAG;
439 default:
440 *nextTokPtr = ptr;
441 return XML_TOK_INVALID;
442 }
443 }
444 return XML_TOK_PARTIAL;
445#ifdef XML_NS
446 case BT_COLON:
447 /* no need to check qname syntax here, since end-tag must match exactly */
448 ptr += MINBPC(enc);
449 break;
450#endif
451 case BT_GT:
452 *nextTokPtr = ptr + MINBPC(enc);
453 return XML_TOK_END_TAG;
454 default:
455 *nextTokPtr = ptr;
456 return XML_TOK_INVALID;
457 }
458 }
459 return XML_TOK_PARTIAL;
460}
461
462/* ptr points to character following "&#X" */
463
464static int EXPATENTRY PREFIX(scanHexCharRef)(const ENCODING *enc,
465 const char *ptr,
466 const char *end,
467 const char **nextTokPtr)
468{
469 if (ptr != end) {
470 switch (BYTE_TYPE(enc, ptr)) {
471 case BT_DIGIT:
472 case BT_HEX:
473 break;
474 default:
475 *nextTokPtr = ptr;
476 return XML_TOK_INVALID;
477 }
478 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
479 switch (BYTE_TYPE(enc, ptr)) {
480 case BT_DIGIT:
481 case BT_HEX:
482 break;
483 case BT_SEMI:
484 *nextTokPtr = ptr + MINBPC(enc);
485 return XML_TOK_CHAR_REF;
486 default:
487 *nextTokPtr = ptr;
488 return XML_TOK_INVALID;
489 }
490 }
491 }
492 return XML_TOK_PARTIAL;
493}
494
495/* ptr points to character following "&#" */
496
497static int EXPATENTRY PREFIX(scanCharRef)(const ENCODING *enc,
498 const char *ptr,
499 const char *end,
500 const char **nextTokPtr)
501{
502 if (ptr != end) {
503 if (CHAR_MATCHES(enc, ptr, ASCII_x))
504 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
505 switch (BYTE_TYPE(enc, ptr)) {
506 case BT_DIGIT:
507 break;
508 default:
509 *nextTokPtr = ptr;
510 return XML_TOK_INVALID;
511 }
512 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
513 switch (BYTE_TYPE(enc, ptr)) {
514 case BT_DIGIT:
515 break;
516 case BT_SEMI:
517 *nextTokPtr = ptr + MINBPC(enc);
518 return XML_TOK_CHAR_REF;
519 default:
520 *nextTokPtr = ptr;
521 return XML_TOK_INVALID;
522 }
523 }
524 }
525 return XML_TOK_PARTIAL;
526}
527
528/* ptr points to character following "&" */
529
530static int EXPATENTRY PREFIX(scanRef)(const ENCODING *enc,
531 const char *ptr,
532 const char *end,
533 const char **nextTokPtr)
534{
535 if (ptr == end)
536 return XML_TOK_PARTIAL;
537 switch (BYTE_TYPE(enc, ptr)) {
538 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
539 case BT_NUM:
540 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
541 default:
542 *nextTokPtr = ptr;
543 return XML_TOK_INVALID;
544 }
545 while (ptr != end) {
546 switch (BYTE_TYPE(enc, ptr)) {
547 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
548 case BT_SEMI:
549 *nextTokPtr = ptr + MINBPC(enc);
550 return XML_TOK_ENTITY_REF;
551 default:
552 *nextTokPtr = ptr;
553 return XML_TOK_INVALID;
554 }
555 }
556 return XML_TOK_PARTIAL;
557}
558
559/* ptr points to character following first character of attribute name */
560
561static int EXPATENTRY PREFIX(scanAtts)(const ENCODING *enc,
562 const char *ptr,
563 const char *end,
564 const char **nextTokPtr)
565{
566#ifdef XML_NS
567 int hadColon = 0;
568#endif
569 while (ptr != end) {
570 switch (BYTE_TYPE(enc, ptr)) {
571 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
572#ifdef XML_NS
573 case BT_COLON:
574 if (hadColon) {
575 *nextTokPtr = ptr;
576 return XML_TOK_INVALID;
577 }
578 hadColon = 1;
579 ptr += MINBPC(enc);
580 if (ptr == end)
581 return XML_TOK_PARTIAL;
582 switch (BYTE_TYPE(enc, ptr)) {
583 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
584 default:
585 *nextTokPtr = ptr;
586 return XML_TOK_INVALID;
587 }
588 break;
589#endif
590 case BT_S: case BT_CR: case BT_LF:
591 for (;;) {
592 int t;
593
594 ptr += MINBPC(enc);
595 if (ptr == end)
596 return XML_TOK_PARTIAL;
597 t = BYTE_TYPE(enc, ptr);
598 if (t == BT_EQUALS)
599 break;
600 switch (t) {
601 case BT_S:
602 case BT_LF:
603 case BT_CR:
604 break;
605 default:
606 *nextTokPtr = ptr;
607 return XML_TOK_INVALID;
608 }
609 }
610 /* fall through */
611 case BT_EQUALS:
612 {
613 int open;
614#ifdef XML_NS
615 hadColon = 0;
616#endif
617 for (;;) {
618
619 ptr += MINBPC(enc);
620 if (ptr == end)
621 return XML_TOK_PARTIAL;
622 open = BYTE_TYPE(enc, ptr);
623 if (open == BT_QUOT || open == BT_APOS)
624 break;
625 switch (open) {
626 case BT_S:
627 case BT_LF:
628 case BT_CR:
629 break;
630 default:
631 *nextTokPtr = ptr;
632 return XML_TOK_INVALID;
633 }
634 }
635 ptr += MINBPC(enc);
636 /* in attribute value */
637 for (;;) {
638 int t;
639 if (ptr == end)
640 return XML_TOK_PARTIAL;
641 t = BYTE_TYPE(enc, ptr);
642 if (t == open)
643 break;
644 switch (t) {
645 INVALID_CASES(ptr, nextTokPtr)
646 case BT_AMP:
647 {
648 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
649 if (tok <= 0) {
650 if (tok == XML_TOK_INVALID)
651 *nextTokPtr = ptr;
652 return tok;
653 }
654 break;
655 }
656 case BT_LT:
657 *nextTokPtr = ptr;
658 return XML_TOK_INVALID;
659 default:
660 ptr += MINBPC(enc);
661 break;
662 }
663 }
664 ptr += MINBPC(enc);
665 if (ptr == end)
666 return XML_TOK_PARTIAL;
667 switch (BYTE_TYPE(enc, ptr)) {
668 case BT_S:
669 case BT_CR:
670 case BT_LF:
671 break;
672 case BT_SOL:
673 goto sol;
674 case BT_GT:
675 goto gt;
676 default:
677 *nextTokPtr = ptr;
678 return XML_TOK_INVALID;
679 }
680 /* ptr points to closing quote */
681 for (;;) {
682 ptr += MINBPC(enc);
683 if (ptr == end)
684 return XML_TOK_PARTIAL;
685 switch (BYTE_TYPE(enc, ptr)) {
686 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
687 case BT_S: case BT_CR: case BT_LF:
688 continue;
689 case BT_GT:
690 gt:
691 *nextTokPtr = ptr + MINBPC(enc);
692 return XML_TOK_START_TAG_WITH_ATTS;
693 case BT_SOL:
694 sol:
695 ptr += MINBPC(enc);
696 if (ptr == end)
697 return XML_TOK_PARTIAL;
698 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
699 *nextTokPtr = ptr;
700 return XML_TOK_INVALID;
701 }
702 *nextTokPtr = ptr + MINBPC(enc);
703 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
704 default:
705 *nextTokPtr = ptr;
706 return XML_TOK_INVALID;
707 }
708 break;
709 }
710 break;
711 }
712 default:
713 *nextTokPtr = ptr;
714 return XML_TOK_INVALID;
715 }
716 }
717 return XML_TOK_PARTIAL;
718}
719
720/* ptr points to character following "<" */
721
722static int EXPATENTRY PREFIX(scanLt)(const ENCODING *enc,
723 const char *ptr,
724 const char *end,
725 const char **nextTokPtr)
726{
727#ifdef XML_NS
728 int hadColon;
729#endif
730 if (ptr == end)
731 return XML_TOK_PARTIAL;
732 switch (BYTE_TYPE(enc, ptr)) {
733 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
734 case BT_EXCL:
735 if ((ptr += MINBPC(enc)) == end)
736 return XML_TOK_PARTIAL;
737 switch (BYTE_TYPE(enc, ptr)) {
738 case BT_MINUS:
739 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
740 case BT_LSQB:
741 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
742 }
743 *nextTokPtr = ptr;
744 return XML_TOK_INVALID;
745 case BT_QUEST:
746 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
747 case BT_SOL:
748 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
749 default:
750 *nextTokPtr = ptr;
751 return XML_TOK_INVALID;
752 }
753#ifdef XML_NS
754 hadColon = 0;
755#endif
756 /* we have a start-tag */
757 while (ptr != end) {
758 switch (BYTE_TYPE(enc, ptr)) {
759 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
760#ifdef XML_NS
761 case BT_COLON:
762 if (hadColon) {
763 *nextTokPtr = ptr;
764 return XML_TOK_INVALID;
765 }
766 hadColon = 1;
767 ptr += MINBPC(enc);
768 if (ptr == end)
769 return XML_TOK_PARTIAL;
770 switch (BYTE_TYPE(enc, ptr)) {
771 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
772 default:
773 *nextTokPtr = ptr;
774 return XML_TOK_INVALID;
775 }
776 break;
777#endif
778 case BT_S: case BT_CR: case BT_LF:
779 {
780 ptr += MINBPC(enc);
781 while (ptr != end) {
782 switch (BYTE_TYPE(enc, ptr)) {
783 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
784 case BT_GT:
785 goto gt;
786 case BT_SOL:
787 goto sol;
788 case BT_S: case BT_CR: case BT_LF:
789 ptr += MINBPC(enc);
790 continue;
791 default:
792 *nextTokPtr = ptr;
793 return XML_TOK_INVALID;
794 }
795 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
796 }
797 return XML_TOK_PARTIAL;
798 }
799 case BT_GT:
800 gt:
801 *nextTokPtr = ptr + MINBPC(enc);
802 return XML_TOK_START_TAG_NO_ATTS;
803 case BT_SOL:
804 sol:
805 ptr += MINBPC(enc);
806 if (ptr == end)
807 return XML_TOK_PARTIAL;
808 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809 *nextTokPtr = ptr;
810 return XML_TOK_INVALID;
811 }
812 *nextTokPtr = ptr + MINBPC(enc);
813 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814 default:
815 *nextTokPtr = ptr;
816 return XML_TOK_INVALID;
817 }
818 }
819 return XML_TOK_PARTIAL;
820}
821
822static int EXPATENTRY PREFIX(contentTok)(const ENCODING *enc,
823 const char *ptr,
824 const char *end,
825 const char **nextTokPtr)
826{
827 if (ptr == end)
828 return XML_TOK_NONE;
829 if (MINBPC(enc) > 1) {
830 size_t n = end - ptr;
831 if (n & (MINBPC(enc) - 1)) {
832 n &= ~(MINBPC(enc) - 1);
833 if (n == 0)
834 return XML_TOK_PARTIAL;
835 end = ptr + n;
836 }
837 }
838 switch (BYTE_TYPE(enc, ptr)) {
839 case BT_LT:
840 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841 case BT_AMP:
842 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
843 case BT_CR:
844 ptr += MINBPC(enc);
845 if (ptr == end)
846 return XML_TOK_TRAILING_CR;
847 if (BYTE_TYPE(enc, ptr) == BT_LF)
848 ptr += MINBPC(enc);
849 *nextTokPtr = ptr;
850 return XML_TOK_DATA_NEWLINE;
851 case BT_LF:
852 *nextTokPtr = ptr + MINBPC(enc);
853 return XML_TOK_DATA_NEWLINE;
854 case BT_RSQB:
855 ptr += MINBPC(enc);
856 if (ptr == end)
857 return XML_TOK_TRAILING_RSQB;
858 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
859 break;
860 ptr += MINBPC(enc);
861 if (ptr == end)
862 return XML_TOK_TRAILING_RSQB;
863 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
864 ptr -= MINBPC(enc);
865 break;
866 }
867 *nextTokPtr = ptr;
868 return XML_TOK_INVALID;
869 INVALID_CASES(ptr, nextTokPtr)
870 default:
871 ptr += MINBPC(enc);
872 break;
873 }
874 while (ptr != end) {
875 switch (BYTE_TYPE(enc, ptr)) {
876#define LEAD_CASE(n) \
877 case BT_LEAD ## n: \
878 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
879 *nextTokPtr = ptr; \
880 return XML_TOK_DATA_CHARS; \
881 } \
882 ptr += n; \
883 break;
884 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
885#undef LEAD_CASE
886 case BT_RSQB:
887 if (ptr + MINBPC(enc) != end) {
888 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889 ptr += MINBPC(enc);
890 break;
891 }
892 if (ptr + 2*MINBPC(enc) != end) {
893 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
894 ptr += MINBPC(enc);
895 break;
896 }
897 *nextTokPtr = ptr + 2*MINBPC(enc);
898 return XML_TOK_INVALID;
899 }
900 }
901 /* fall through */
902 case BT_AMP:
903 case BT_LT:
904 case BT_NONXML:
905 case BT_MALFORM:
906 case BT_TRAIL:
907 case BT_CR:
908 case BT_LF:
909 *nextTokPtr = ptr;
910 return XML_TOK_DATA_CHARS;
911 default:
912 ptr += MINBPC(enc);
913 break;
914 }
915 }
916 *nextTokPtr = ptr;
917 return XML_TOK_DATA_CHARS;
918}
919
920/* ptr points to character following "%" */
921
922static int EXPATENTRY PREFIX(scanPercent)(const ENCODING *enc,
923 const char *ptr,
924 const char *end,
925 const char **nextTokPtr)
926{
927 if (ptr == end)
928 return XML_TOK_PARTIAL;
929 switch (BYTE_TYPE(enc, ptr)) {
930 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
931 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
932 *nextTokPtr = ptr;
933 return XML_TOK_PERCENT;
934 default:
935 *nextTokPtr = ptr;
936 return XML_TOK_INVALID;
937 }
938 while (ptr != end) {
939 switch (BYTE_TYPE(enc, ptr)) {
940 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941 case BT_SEMI:
942 *nextTokPtr = ptr + MINBPC(enc);
943 return XML_TOK_PARAM_ENTITY_REF;
944 default:
945 *nextTokPtr = ptr;
946 return XML_TOK_INVALID;
947 }
948 }
949 return XML_TOK_PARTIAL;
950}
951
952static int EXPATENTRY PREFIX(scanPoundName)(const ENCODING *enc,
953 const char *ptr,
954 const char *end,
955 const char **nextTokPtr)
956{
957 if (ptr == end)
958 return XML_TOK_PARTIAL;
959 switch (BYTE_TYPE(enc, ptr)) {
960 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
961 default:
962 *nextTokPtr = ptr;
963 return XML_TOK_INVALID;
964 }
965 while (ptr != end) {
966 switch (BYTE_TYPE(enc, ptr)) {
967 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
968 case BT_CR: case BT_LF: case BT_S:
969 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
970 *nextTokPtr = ptr;
971 return XML_TOK_POUND_NAME;
972 default:
973 *nextTokPtr = ptr;
974 return XML_TOK_INVALID;
975 }
976 }
977 return -XML_TOK_POUND_NAME;
978}
979
980static int EXPATENTRY PREFIX(scanLit)(int open,
981 const ENCODING *enc,
982 const char *ptr,
983 const char *end,
984 const char **nextTokPtr)
985{
986 while (ptr != end) {
987 int t = BYTE_TYPE(enc, ptr);
988 switch (t) {
989 INVALID_CASES(ptr, nextTokPtr)
990 case BT_QUOT:
991 case BT_APOS:
992 ptr += MINBPC(enc);
993 if (t != open)
994 break;
995 if (ptr == end)
996 return -XML_TOK_LITERAL;
997 *nextTokPtr = ptr;
998 switch (BYTE_TYPE(enc, ptr)) {
999 case BT_S: case BT_CR: case BT_LF:
1000 case BT_GT: case BT_PERCNT: case BT_LSQB:
1001 return XML_TOK_LITERAL;
1002 default:
1003 return XML_TOK_INVALID;
1004 }
1005 default:
1006 ptr += MINBPC(enc);
1007 break;
1008 }
1009 }
1010 return XML_TOK_PARTIAL;
1011}
1012
1013static int EXPATENTRY PREFIX(prologTok)(const ENCODING *enc,
1014 const char *ptr,
1015 const char *end,
1016 const char **nextTokPtr)
1017{
1018 int tok;
1019 if (ptr == end)
1020 return XML_TOK_NONE;
1021 if (MINBPC(enc) > 1) {
1022 size_t n = end - ptr;
1023 if (n & (MINBPC(enc) - 1)) {
1024 n &= ~(MINBPC(enc) - 1);
1025 if (n == 0)
1026 return XML_TOK_PARTIAL;
1027 end = ptr + n;
1028 }
1029 }
1030 switch (BYTE_TYPE(enc, ptr)) {
1031 case BT_QUOT:
1032 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1033 case BT_APOS:
1034 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1035 case BT_LT:
1036 {
1037 ptr += MINBPC(enc);
1038 if (ptr == end)
1039 return XML_TOK_PARTIAL;
1040 switch (BYTE_TYPE(enc, ptr)) {
1041 case BT_EXCL:
1042 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1043 case BT_QUEST:
1044 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1045 case BT_NMSTRT:
1046 case BT_HEX:
1047 case BT_NONASCII:
1048 case BT_LEAD2:
1049 case BT_LEAD3:
1050 case BT_LEAD4:
1051 *nextTokPtr = ptr - MINBPC(enc);
1052 return XML_TOK_INSTANCE_START;
1053 }
1054 *nextTokPtr = ptr;
1055 return XML_TOK_INVALID;
1056 }
1057 case BT_CR:
1058 if (ptr + MINBPC(enc) == end)
1059 return -XML_TOK_PROLOG_S;
1060 /* fall through */
1061 case BT_S: case BT_LF:
1062 for (;;) {
1063 ptr += MINBPC(enc);
1064 if (ptr == end)
1065 break;
1066 switch (BYTE_TYPE(enc, ptr)) {
1067 case BT_S: case BT_LF:
1068 break;
1069 case BT_CR:
1070 /* don't split CR/LF pair */
1071 if (ptr + MINBPC(enc) != end)
1072 break;
1073 /* fall through */
1074 default:
1075 *nextTokPtr = ptr;
1076 return XML_TOK_PROLOG_S;
1077 }
1078 }
1079 *nextTokPtr = ptr;
1080 return XML_TOK_PROLOG_S;
1081 case BT_PERCNT:
1082 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1083 case BT_COMMA:
1084 *nextTokPtr = ptr + MINBPC(enc);
1085 return XML_TOK_COMMA;
1086 case BT_LSQB:
1087 *nextTokPtr = ptr + MINBPC(enc);
1088 return XML_TOK_OPEN_BRACKET;
1089 case BT_RSQB:
1090 ptr += MINBPC(enc);
1091 if (ptr == end)
1092 return -XML_TOK_CLOSE_BRACKET;
1093 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1094 if (ptr + MINBPC(enc) == end)
1095 return XML_TOK_PARTIAL;
1096 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1097 *nextTokPtr = ptr + 2*MINBPC(enc);
1098 return XML_TOK_COND_SECT_CLOSE;
1099 }
1100 }
1101 *nextTokPtr = ptr;
1102 return XML_TOK_CLOSE_BRACKET;
1103 case BT_LPAR:
1104 *nextTokPtr = ptr + MINBPC(enc);
1105 return XML_TOK_OPEN_PAREN;
1106 case BT_RPAR:
1107 ptr += MINBPC(enc);
1108 if (ptr == end)
1109 return -XML_TOK_CLOSE_PAREN;
1110 switch (BYTE_TYPE(enc, ptr)) {
1111 case BT_AST:
1112 *nextTokPtr = ptr + MINBPC(enc);
1113 return XML_TOK_CLOSE_PAREN_ASTERISK;
1114 case BT_QUEST:
1115 *nextTokPtr = ptr + MINBPC(enc);
1116 return XML_TOK_CLOSE_PAREN_QUESTION;
1117 case BT_PLUS:
1118 *nextTokPtr = ptr + MINBPC(enc);
1119 return XML_TOK_CLOSE_PAREN_PLUS;
1120 case BT_CR: case BT_LF: case BT_S:
1121 case BT_GT: case BT_COMMA: case BT_VERBAR:
1122 case BT_RPAR:
1123 *nextTokPtr = ptr;
1124 return XML_TOK_CLOSE_PAREN;
1125 }
1126 *nextTokPtr = ptr;
1127 return XML_TOK_INVALID;
1128 case BT_VERBAR:
1129 *nextTokPtr = ptr + MINBPC(enc);
1130 return XML_TOK_OR;
1131 case BT_GT:
1132 *nextTokPtr = ptr + MINBPC(enc);
1133 return XML_TOK_DECL_CLOSE;
1134 case BT_NUM:
1135 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1136#define LEAD_CASE(n) \
1137 case BT_LEAD ## n: \
1138 if (end - ptr < n) \
1139 return XML_TOK_PARTIAL_CHAR; \
1140 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1141 ptr += n; \
1142 tok = XML_TOK_NAME; \
1143 break; \
1144 } \
1145 if (IS_NAME_CHAR(enc, ptr, n)) { \
1146 ptr += n; \
1147 tok = XML_TOK_NMTOKEN; \
1148 break; \
1149 } \
1150 *nextTokPtr = ptr; \
1151 return XML_TOK_INVALID;
1152 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1153#undef LEAD_CASE
1154 case BT_NMSTRT:
1155 case BT_HEX:
1156 tok = XML_TOK_NAME;
1157 ptr += MINBPC(enc);
1158 break;
1159 case BT_DIGIT:
1160 case BT_NAME:
1161 case BT_MINUS:
1162#ifdef XML_NS
1163 case BT_COLON:
1164#endif
1165 tok = XML_TOK_NMTOKEN;
1166 ptr += MINBPC(enc);
1167 break;
1168 case BT_NONASCII:
1169 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1170 ptr += MINBPC(enc);
1171 tok = XML_TOK_NAME;
1172 break;
1173 }
1174 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1175 ptr += MINBPC(enc);
1176 tok = XML_TOK_NMTOKEN;
1177 break;
1178 }
1179 /* fall through */
1180 default:
1181 *nextTokPtr = ptr;
1182 return XML_TOK_INVALID;
1183 }
1184 while (ptr != end) {
1185 switch (BYTE_TYPE(enc, ptr)) {
1186 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1187 case BT_GT: case BT_RPAR: case BT_COMMA:
1188 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
1189 case BT_S: case BT_CR: case BT_LF:
1190 *nextTokPtr = ptr;
1191 return tok;
1192#ifdef XML_NS
1193 case BT_COLON:
1194 ptr += MINBPC(enc);
1195 switch (tok) {
1196 case XML_TOK_NAME:
1197 if (ptr == end)
1198 return XML_TOK_PARTIAL;
1199 tok = XML_TOK_PREFIXED_NAME;
1200 switch (BYTE_TYPE(enc, ptr)) {
1201 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1202 default:
1203 tok = XML_TOK_NMTOKEN;
1204 break;
1205 }
1206 break;
1207 case XML_TOK_PREFIXED_NAME:
1208 tok = XML_TOK_NMTOKEN;
1209 break;
1210 }
1211 break;
1212#endif
1213 case BT_PLUS:
1214 if (tok == XML_TOK_NMTOKEN) {
1215 *nextTokPtr = ptr;
1216 return XML_TOK_INVALID;
1217 }
1218 *nextTokPtr = ptr + MINBPC(enc);
1219 return XML_TOK_NAME_PLUS;
1220 case BT_AST:
1221 if (tok == XML_TOK_NMTOKEN) {
1222 *nextTokPtr = ptr;
1223 return XML_TOK_INVALID;
1224 }
1225 *nextTokPtr = ptr + MINBPC(enc);
1226 return XML_TOK_NAME_ASTERISK;
1227 case BT_QUEST:
1228 if (tok == XML_TOK_NMTOKEN) {
1229 *nextTokPtr = ptr;
1230 return XML_TOK_INVALID;
1231 }
1232 *nextTokPtr = ptr + MINBPC(enc);
1233 return XML_TOK_NAME_QUESTION;
1234 default:
1235 *nextTokPtr = ptr;
1236 return XML_TOK_INVALID;
1237 }
1238 }
1239 return -tok;
1240}
1241
1242static int EXPATENTRY PREFIX(attributeValueTok)(const ENCODING *enc,
1243 const char *ptr,
1244 const char *end,
1245 const char **nextTokPtr)
1246{
1247 const char *start;
1248 if (ptr == end)
1249 return XML_TOK_NONE;
1250 start = ptr;
1251 while (ptr != end) {
1252 switch (BYTE_TYPE(enc, ptr)) {
1253#define LEAD_CASE(n) \
1254 case BT_LEAD ## n: ptr += n; break;
1255 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1256#undef LEAD_CASE
1257 case BT_AMP:
1258 if (ptr == start)
1259 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1260 *nextTokPtr = ptr;
1261 return XML_TOK_DATA_CHARS;
1262 case BT_LT:
1263 /* this is for inside entity references */
1264 *nextTokPtr = ptr;
1265 return XML_TOK_INVALID;
1266 case BT_LF:
1267 if (ptr == start) {
1268 *nextTokPtr = ptr + MINBPC(enc);
1269 return XML_TOK_DATA_NEWLINE;
1270 }
1271 *nextTokPtr = ptr;
1272 return XML_TOK_DATA_CHARS;
1273 case BT_CR:
1274 if (ptr == start) {
1275 ptr += MINBPC(enc);
1276 if (ptr == end)
1277 return XML_TOK_TRAILING_CR;
1278 if (BYTE_TYPE(enc, ptr) == BT_LF)
1279 ptr += MINBPC(enc);
1280 *nextTokPtr = ptr;
1281 return XML_TOK_DATA_NEWLINE;
1282 }
1283 *nextTokPtr = ptr;
1284 return XML_TOK_DATA_CHARS;
1285 case BT_S:
1286 if (ptr == start) {
1287 *nextTokPtr = ptr + MINBPC(enc);
1288 return XML_TOK_ATTRIBUTE_VALUE_S;
1289 }
1290 *nextTokPtr = ptr;
1291 return XML_TOK_DATA_CHARS;
1292 default:
1293 ptr += MINBPC(enc);
1294 break;
1295 }
1296 }
1297 *nextTokPtr = ptr;
1298 return XML_TOK_DATA_CHARS;
1299}
1300
1301static int EXPATENTRY PREFIX(entityValueTok)(const ENCODING *enc,
1302 const char *ptr,
1303 const char *end,
1304 const char **nextTokPtr)
1305{
1306 const char *start;
1307 if (ptr == end)
1308 return XML_TOK_NONE;
1309 start = ptr;
1310 while (ptr != end) {
1311 switch (BYTE_TYPE(enc, ptr)) {
1312#define LEAD_CASE(n) \
1313 case BT_LEAD ## n: ptr += n; break;
1314 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1315#undef LEAD_CASE
1316 case BT_AMP:
1317 if (ptr == start)
1318 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1319 *nextTokPtr = ptr;
1320 return XML_TOK_DATA_CHARS;
1321 case BT_PERCNT:
1322 if (ptr == start) {
1323 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
1324 end, nextTokPtr);
1325 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1326 }
1327 *nextTokPtr = ptr;
1328 return XML_TOK_DATA_CHARS;
1329 case BT_LF:
1330 if (ptr == start) {
1331 *nextTokPtr = ptr + MINBPC(enc);
1332 return XML_TOK_DATA_NEWLINE;
1333 }
1334 *nextTokPtr = ptr;
1335 return XML_TOK_DATA_CHARS;
1336 case BT_CR:
1337 if (ptr == start) {
1338 ptr += MINBPC(enc);
1339 if (ptr == end)
1340 return XML_TOK_TRAILING_CR;
1341 if (BYTE_TYPE(enc, ptr) == BT_LF)
1342 ptr += MINBPC(enc);
1343 *nextTokPtr = ptr;
1344 return XML_TOK_DATA_NEWLINE;
1345 }
1346 *nextTokPtr = ptr;
1347 return XML_TOK_DATA_CHARS;
1348 default:
1349 ptr += MINBPC(enc);
1350 break;
1351 }
1352 }
1353 *nextTokPtr = ptr;
1354 return XML_TOK_DATA_CHARS;
1355}
1356
1357#ifdef XML_DTD
1358
1359static int EXPATENTRY PREFIX(ignoreSectionTok)(const ENCODING *enc,
1360 const char *ptr,
1361 const char *end,
1362 const char **nextTokPtr)
1363{
1364 int level = 0;
1365 if (MINBPC(enc) > 1) {
1366 size_t n = end - ptr;
1367 if (n & (MINBPC(enc) - 1)) {
1368 n &= ~(MINBPC(enc) - 1);
1369 end = ptr + n;
1370 }
1371 }
1372 while (ptr != end) {
1373 switch (BYTE_TYPE(enc, ptr)) {
1374 INVALID_CASES(ptr, nextTokPtr)
1375 case BT_LT:
1376 if ((ptr += MINBPC(enc)) == end)
1377 return XML_TOK_PARTIAL;
1378 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1379 if ((ptr += MINBPC(enc)) == end)
1380 return XML_TOK_PARTIAL;
1381 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1382 ++level;
1383 ptr += MINBPC(enc);
1384 }
1385 }
1386 break;
1387 case BT_RSQB:
1388 if ((ptr += MINBPC(enc)) == end)
1389 return XML_TOK_PARTIAL;
1390 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1391 if ((ptr += MINBPC(enc)) == end)
1392 return XML_TOK_PARTIAL;
1393 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1394 ptr += MINBPC(enc);
1395 if (level == 0) {
1396 *nextTokPtr = ptr;
1397 return XML_TOK_IGNORE_SECT;
1398 }
1399 --level;
1400 }
1401 }
1402 break;
1403 default:
1404 ptr += MINBPC(enc);
1405 break;
1406 }
1407 }
1408 return XML_TOK_PARTIAL;
1409}
1410
1411#endif /* XML_DTD */
1412
1413static int EXPATENTRY PREFIX(isPublicId)(const ENCODING *enc,
1414 const char *ptr,
1415 const char *end,
1416 const char **badPtr)
1417{
1418 ptr += MINBPC(enc);
1419 end -= MINBPC(enc);
1420 for (; ptr != end; ptr += MINBPC(enc)) {
1421 switch (BYTE_TYPE(enc, ptr)) {
1422 case BT_DIGIT:
1423 case BT_HEX:
1424 case BT_MINUS:
1425 case BT_APOS:
1426 case BT_LPAR:
1427 case BT_RPAR:
1428 case BT_PLUS:
1429 case BT_COMMA:
1430 case BT_SOL:
1431 case BT_EQUALS:
1432 case BT_QUEST:
1433 case BT_CR:
1434 case BT_LF:
1435 case BT_SEMI:
1436 case BT_EXCL:
1437 case BT_AST:
1438 case BT_PERCNT:
1439 case BT_NUM:
1440#ifdef XML_NS
1441 case BT_COLON:
1442#endif
1443 break;
1444 case BT_S:
1445 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1446 *badPtr = ptr;
1447 return 0;
1448 }
1449 break;
1450 case BT_NAME:
1451 case BT_NMSTRT:
1452 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1453 break;
1454 default:
1455 switch (BYTE_TO_ASCII(enc, ptr)) {
1456 case 0x24: /* $ */
1457 case 0x40: /* @ */
1458 break;
1459 default:
1460 *badPtr = ptr;
1461 return 0;
1462 }
1463 break;
1464 }
1465 }
1466 return 1;
1467}
1468
1469/* This must only be called for a well-formed start-tag or empty element tag.
1470Returns the number of attributes. Pointers to the first attsMax attributes
1471are stored in atts. */
1472
1473static int EXPATENTRY PREFIX(getAtts)(const ENCODING *enc,
1474 const char *ptr,
1475 int attsMax,
1476 ATTRIBUTE *atts)
1477{
1478 enum { other, inName, inValue } state = inName;
1479 int nAtts = 0;
1480 int open = 0; /* defined when state == inValue;
1481 initialization just to shut up compilers */
1482
1483 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1484 switch (BYTE_TYPE(enc, ptr)) {
1485#define START_NAME \
1486 if (state == other) { \
1487 if (nAtts < attsMax) { \
1488 atts[nAtts].name = ptr; \
1489 atts[nAtts].normalized = 1; \
1490 } \
1491 state = inName; \
1492 }
1493#define LEAD_CASE(n) \
1494 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1495 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1496#undef LEAD_CASE
1497 case BT_NONASCII:
1498 case BT_NMSTRT:
1499 case BT_HEX:
1500 START_NAME
1501 break;
1502#undef START_NAME
1503 case BT_QUOT:
1504 if (state != inValue) {
1505 if (nAtts < attsMax)
1506 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1507 state = inValue;
1508 open = BT_QUOT;
1509 }
1510 else if (open == BT_QUOT) {
1511 state = other;
1512 if (nAtts < attsMax)
1513 atts[nAtts].valueEnd = ptr;
1514 nAtts++;
1515 }
1516 break;
1517 case BT_APOS:
1518 if (state != inValue) {
1519 if (nAtts < attsMax)
1520 atts[nAtts].valuePtr = ptr + MINBPC(enc);
1521 state = inValue;
1522 open = BT_APOS;
1523 }
1524 else if (open == BT_APOS) {
1525 state = other;
1526 if (nAtts < attsMax)
1527 atts[nAtts].valueEnd = ptr;
1528 nAtts++;
1529 }
1530 break;
1531 case BT_AMP:
1532 if (nAtts < attsMax)
1533 atts[nAtts].normalized = 0;
1534 break;
1535 case BT_S:
1536 if (state == inName)
1537 state = other;
1538 else if (state == inValue
1539 && nAtts < attsMax
1540 && atts[nAtts].normalized
1541 && (ptr == atts[nAtts].valuePtr
1542 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1543 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1544 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1545 atts[nAtts].normalized = 0;
1546 break;
1547 case BT_CR: case BT_LF:
1548 /* This case ensures that the first attribute name is counted
1549 Apart from that we could just change state on the quote. */
1550 if (state == inName)
1551 state = other;
1552 else if (state == inValue && nAtts < attsMax)
1553 atts[nAtts].normalized = 0;
1554 break;
1555 case BT_GT:
1556 case BT_SOL:
1557 if (state != inValue)
1558 return nAtts;
1559 break;
1560 default:
1561 break;
1562 }
1563 }
1564 /* not reached */
1565}
1566
1567static int EXPATENTRY PREFIX(charRefNumber)(const ENCODING *enc,
1568 const char *ptr)
1569{
1570 int result = 0;
1571 /* skip &# */
1572 ptr += 2*MINBPC(enc);
1573 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1574 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1575 int c = BYTE_TO_ASCII(enc, ptr);
1576 switch (c) {
1577 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
1578 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
1579 result <<= 4;
1580 result |= (c - ASCII_0);
1581 break;
1582 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
1583 result <<= 4;
1584 result += 10 + (c - ASCII_A);
1585 break;
1586 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
1587 result <<= 4;
1588 result += 10 + (c - ASCII_a);
1589 break;
1590 }
1591 if (result >= 0x110000)
1592 return -1;
1593 }
1594 }
1595 else {
1596 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1597 int c = BYTE_TO_ASCII(enc, ptr);
1598 result *= 10;
1599 result += (c - ASCII_0);
1600 if (result >= 0x110000)
1601 return -1;
1602 }
1603 }
1604 return checkCharRefNumber(result);
1605}
1606
1607static int EXPATENTRY PREFIX(predefinedEntityName)(const ENCODING *enc,
1608 const char *ptr,
1609 const char *end)
1610{
1611 switch ((end - ptr)/MINBPC(enc)) {
1612 case 2:
1613 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1614 switch (BYTE_TO_ASCII(enc, ptr)) {
1615 case ASCII_l:
1616 return ASCII_LT;
1617 case ASCII_g:
1618 return ASCII_GT;
1619 }
1620 }
1621 break;
1622 case 3:
1623 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1624 ptr += MINBPC(enc);
1625 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1626 ptr += MINBPC(enc);
1627 if (CHAR_MATCHES(enc, ptr, ASCII_p))
1628 return ASCII_AMP;
1629 }
1630 }
1631 break;
1632 case 4:
1633 switch (BYTE_TO_ASCII(enc, ptr)) {
1634 case ASCII_q:
1635 ptr += MINBPC(enc);
1636 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1637 ptr += MINBPC(enc);
1638 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1639 ptr += MINBPC(enc);
1640 if (CHAR_MATCHES(enc, ptr, ASCII_t))
1641 return ASCII_QUOT;
1642 }
1643 }
1644 break;
1645 case ASCII_a:
1646 ptr += MINBPC(enc);
1647 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1648 ptr += MINBPC(enc);
1649 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1650 ptr += MINBPC(enc);
1651 if (CHAR_MATCHES(enc, ptr, ASCII_s))
1652 return ASCII_APOS;
1653 }
1654 }
1655 break;
1656 }
1657 }
1658 return 0;
1659}
1660
1661static int EXPATENTRY PREFIX(sameName)(const ENCODING *enc,
1662 const char *ptr1,
1663 const char *ptr2)
1664{
1665 for (;;) {
1666 switch (BYTE_TYPE(enc, ptr1)) {
1667#define LEAD_CASE(n) \
1668 case BT_LEAD ## n: \
1669 if (*ptr1++ != *ptr2++) \
1670 return 0;
1671 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1672#undef LEAD_CASE
1673 /* fall through */
1674 if (*ptr1++ != *ptr2++)
1675 return 0;
1676 break;
1677 case BT_NONASCII:
1678 case BT_NMSTRT:
1679#ifdef XML_NS
1680 case BT_COLON:
1681#endif
1682 case BT_HEX:
1683 case BT_DIGIT:
1684 case BT_NAME:
1685 case BT_MINUS:
1686 if (*ptr2++ != *ptr1++)
1687 return 0;
1688 if (MINBPC(enc) > 1) {
1689 if (*ptr2++ != *ptr1++)
1690 return 0;
1691 if (MINBPC(enc) > 2) {
1692 if (*ptr2++ != *ptr1++)
1693 return 0;
1694 if (MINBPC(enc) > 3) {
1695 if (*ptr2++ != *ptr1++)
1696 return 0;
1697 }
1698 }
1699 }
1700 break;
1701 default:
1702 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
1703 return 1;
1704 switch (BYTE_TYPE(enc, ptr2)) {
1705 case BT_LEAD2:
1706 case BT_LEAD3:
1707 case BT_LEAD4:
1708 case BT_NONASCII:
1709 case BT_NMSTRT:
1710#ifdef XML_NS
1711 case BT_COLON:
1712#endif
1713 case BT_HEX:
1714 case BT_DIGIT:
1715 case BT_NAME:
1716 case BT_MINUS:
1717 return 0;
1718 default:
1719 return 1;
1720 }
1721 }
1722 }
1723 /* not reached */
1724}
1725
1726static int EXPATENTRY PREFIX(nameMatchesAscii)(const ENCODING *enc,
1727 const char *ptr1,
1728 const char *end1,
1729 const char *ptr2)
1730{
1731 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1732 if (ptr1 == end1)
1733 return 0;
1734 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
1735 return 0;
1736 }
1737 return ptr1 == end1;
1738}
1739
1740static int EXPATENTRY PREFIX(nameLength)(const ENCODING *enc,
1741 const char *ptr)
1742{
1743 const char *start = ptr;
1744 for (;;) {
1745 switch (BYTE_TYPE(enc, ptr)) {
1746#define LEAD_CASE(n) \
1747 case BT_LEAD ## n: ptr += n; break;
1748 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1749#undef LEAD_CASE
1750 case BT_NONASCII:
1751 case BT_NMSTRT:
1752#ifdef XML_NS
1753 case BT_COLON:
1754#endif
1755 case BT_HEX:
1756 case BT_DIGIT:
1757 case BT_NAME:
1758 case BT_MINUS:
1759 ptr += MINBPC(enc);
1760 break;
1761 default:
1762 return ptr - start;
1763 }
1764 }
1765}
1766
1767static const char* EXPATENTRY PREFIX(skipS)(const ENCODING *enc,
1768 const char *ptr)
1769{
1770 for (;;) {
1771 switch (BYTE_TYPE(enc, ptr)) {
1772 case BT_LF:
1773 case BT_CR:
1774 case BT_S:
1775 ptr += MINBPC(enc);
1776 break;
1777 default:
1778 return ptr;
1779 }
1780 }
1781}
1782
1783static void EXPATENTRY PREFIX(updatePosition)(const ENCODING *enc,
1784 const char *ptr,
1785 const char *end,
1786 POSITION *pos)
1787{
1788 while (ptr != end) {
1789 switch (BYTE_TYPE(enc, ptr)) {
1790#define LEAD_CASE(n) \
1791 case BT_LEAD ## n: \
1792 ptr += n; \
1793 break;
1794 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1795#undef LEAD_CASE
1796 case BT_LF:
1797 pos->columnNumber = (unsigned)-1;
1798 pos->lineNumber++;
1799 ptr += MINBPC(enc);
1800 break;
1801 case BT_CR:
1802 pos->lineNumber++;
1803 ptr += MINBPC(enc);
1804 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
1805 ptr += MINBPC(enc);
1806 pos->columnNumber = (unsigned)-1;
1807 break;
1808 default:
1809 ptr += MINBPC(enc);
1810 break;
1811 }
1812 pos->columnNumber++;
1813 }
1814}
1815
1816#undef DO_LEAD_CASE
1817#undef MULTIBYTE_CASES
1818#undef INVALID_CASES
1819#undef CHECK_NAME_CASE
1820#undef CHECK_NAME_CASES
1821#undef CHECK_NMSTRT_CASE
1822#undef CHECK_NMSTRT_CASES
Note: See TracBrowser for help on using the repository browser.