source: trunk/include/expat/xmltok.h@ 37

Last change on this file since 37 was 36, checked in by umoeller, 25 years ago

Added XML.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 10.9 KB
Line 
1/*
2Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3See the file COPYING for copying permission.
4*/
5
6#ifndef XmlTok_INCLUDED
7#define XmlTok_INCLUDED 1
8
9#ifdef __cplusplus
10extern "C" {
11#endif
12
13/* The following token may be returned by XmlContentTok */
14#define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of
15 illegal ]]> sequence */
16/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
17#define XML_TOK_NONE -4 /* The string to be scanned is empty */
18#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
19 might be part of CRLF sequence */
20#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
21#define XML_TOK_PARTIAL -1 /* only part of a token */
22#define XML_TOK_INVALID 0
23
24/* The following tokens are returned by XmlContentTok; some are also
25 returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */
26
27#define XML_TOK_START_TAG_WITH_ATTS 1
28#define XML_TOK_START_TAG_NO_ATTS 2
29#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
30#define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
31#define XML_TOK_END_TAG 5
32#define XML_TOK_DATA_CHARS 6
33#define XML_TOK_DATA_NEWLINE 7
34#define XML_TOK_CDATA_SECT_OPEN 8
35#define XML_TOK_ENTITY_REF 9
36#define XML_TOK_CHAR_REF 10 /* numeric character reference */
37
38/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
39#define XML_TOK_PI 11 /* processing instruction */
40#define XML_TOK_XML_DECL 12 /* XML decl or text decl */
41#define XML_TOK_COMMENT 13
42#define XML_TOK_BOM 14 /* Byte order mark */
43
44/* The following tokens are returned only by XmlPrologTok */
45#define XML_TOK_PROLOG_S 15
46#define XML_TOK_DECL_OPEN 16 /* <!foo */
47#define XML_TOK_DECL_CLOSE 17 /* > */
48#define XML_TOK_NAME 18
49#define XML_TOK_NMTOKEN 19
50#define XML_TOK_POUND_NAME 20 /* #name */
51#define XML_TOK_OR 21 /* | */
52#define XML_TOK_PERCENT 22
53#define XML_TOK_OPEN_PAREN 23
54#define XML_TOK_CLOSE_PAREN 24
55#define XML_TOK_OPEN_BRACKET 25
56#define XML_TOK_CLOSE_BRACKET 26
57#define XML_TOK_LITERAL 27
58#define XML_TOK_PARAM_ENTITY_REF 28
59#define XML_TOK_INSTANCE_START 29
60
61/* The following occur only in element type declarations */
62#define XML_TOK_NAME_QUESTION 30 /* name? */
63#define XML_TOK_NAME_ASTERISK 31 /* name* */
64#define XML_TOK_NAME_PLUS 32 /* name+ */
65#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
66#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
67#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
68#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
69#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
70#define XML_TOK_COMMA 38
71
72/* The following token is returned only by XmlAttributeValueTok */
73#define XML_TOK_ATTRIBUTE_VALUE_S 39
74
75/* The following token is returned only by XmlCdataSectionTok */
76#define XML_TOK_CDATA_SECT_CLOSE 40
77
78/* With namespace processing this is returned by XmlPrologTok
79 for a name with a colon. */
80#define XML_TOK_PREFIXED_NAME 41
81
82#ifdef XML_DTD
83#define XML_TOK_IGNORE_SECT 42
84#endif /* XML_DTD */
85
86#ifdef XML_DTD
87#define XML_N_STATES 4
88#else /* not XML_DTD */
89#define XML_N_STATES 3
90#endif /* not XML_DTD */
91
92#define XML_PROLOG_STATE 0
93#define XML_CONTENT_STATE 1
94#define XML_CDATA_SECTION_STATE 2
95#ifdef XML_DTD
96#define XML_IGNORE_SECTION_STATE 3
97#endif /* XML_DTD */
98
99#define XML_N_LITERAL_TYPES 2
100#define XML_ATTRIBUTE_VALUE_LITERAL 0
101#define XML_ENTITY_VALUE_LITERAL 1
102
103/* The size of the buffer passed to XmlUtf8Encode must be at least this. */
104#define XML_UTF8_ENCODE_MAX 4
105/* The size of the buffer passed to XmlUtf16Encode must be at least this. */
106#define XML_UTF16_ENCODE_MAX 2
107
108typedef struct position {
109 /* first line and first column are 0 not 1 */
110 unsigned long lineNumber;
111 unsigned long columnNumber;
112} POSITION;
113
114typedef struct
115{
116 const char *name;
117 const char *valuePtr;
118 const char *valueEnd;
119 char normalized;
120} ATTRIBUTE, *PATTRIBUTE;
121
122struct encoding;
123typedef struct encoding ENCODING, *PENCODING;
124
125typedef int EXPATENTRY SCANNERFUNC(const ENCODING *,
126 const char *,
127 const char *,
128 const char **);
129typedef SCANNERFUNC *PSCANNERFUNC;
130
131struct encoding
132{
133 PSCANNERFUNC scanners[XML_N_STATES];
134 PSCANNERFUNC literalScanners[XML_N_LITERAL_TYPES];
135 int (* EXPATENTRY sameName)(const ENCODING *,
136 const char *,
137 const char *);
138 int (* EXPATENTRY nameMatchesAscii)(const ENCODING *,
139 const char *,
140 const char *,
141 const char *);
142 int (* EXPATENTRY nameLength)(const ENCODING *,
143 const char *);
144 const char *(*skipS)(const ENCODING *,
145 const char *);
146 int (* EXPATENTRY getAtts)(const ENCODING *enc,
147 const char *ptr,
148 int attsMax,
149 ATTRIBUTE *atts);
150 int (* EXPATENTRY charRefNumber)(const ENCODING *enc,
151 const char *ptr);
152 int (* EXPATENTRY predefinedEntityName)(const ENCODING *,
153 const char *,
154 const char *);
155 void (* EXPATENTRY updatePosition)(const ENCODING *,
156 const char *ptr,
157 const char *end,
158 POSITION *);
159 int (* EXPATENTRY isPublicId)(const ENCODING *enc,
160 const char *ptr,
161 const char *end,
162 const char **badPtr);
163 void (* EXPATENTRY utf8Convert)(const ENCODING *enc,
164 const char **fromP,
165 const char *fromLim,
166 char **toP,
167 const char *toLim);
168 void (* EXPATENTRY utf16Convert)(const ENCODING *enc,
169 const char **fromP,
170 const char *fromLim,
171 unsigned short **toP,
172 const unsigned short *toLim);
173 int minBytesPerChar;
174 char isUtf8;
175 char isUtf16;
176};
177
178/*
179Scan the string starting at ptr until the end of the next complete token,
180but do not scan past eptr. Return an integer giving the type of token.
181
182Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
183
184Return XML_TOK_PARTIAL when the string does not contain a complete token;
185nextTokPtr will not be set.
186
187Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr
188will be set to point to the character which made the token invalid.
189
190Otherwise the string starts with a valid token; nextTokPtr will be set to point
191to the character following the end of that token.
192
193Each data character counts as a single token, but adjacent data characters
194may be returned together. Similarly for characters in the prolog outside
195literals, comments and processing instructions.
196*/
197
198
199#define XmlTok(enc, state, ptr, end, nextTokPtr) \
200 (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
201
202#define XmlPrologTok(enc, ptr, end, nextTokPtr) \
203 XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
204
205#define XmlContentTok(enc, ptr, end, nextTokPtr) \
206 XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
207
208#define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
209 XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
210
211#ifdef XML_DTD
212
213#define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
214 XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
215
216#endif /* XML_DTD */
217
218/* This is used for performing a 2nd-level tokenization on
219the content of a literal that has already been returned by XmlTok. */
220
221#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
222 (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
223
224#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
225 XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
226
227#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
228 XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
229
230#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
231
232#define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
233 (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
234
235#define XmlNameLength(enc, ptr) \
236 (((enc)->nameLength)(enc, ptr))
237
238#define XmlSkipS(enc, ptr) \
239 (((enc)->skipS)(enc, ptr))
240
241#define XmlGetAttributes(enc, ptr, attsMax, atts) \
242 (((enc)->getAtts)(enc, ptr, attsMax, atts))
243
244#define XmlCharRefNumber(enc, ptr) \
245 (((enc)->charRefNumber)(enc, ptr))
246
247#define XmlPredefinedEntityName(enc, ptr, end) \
248 (((enc)->predefinedEntityName)(enc, ptr, end))
249
250#define XmlUpdatePosition(enc, ptr, end, pos) \
251 (((enc)->updatePosition)(enc, ptr, end, pos))
252
253#define XmlIsPublicId(enc, ptr, end, badPtr) \
254 (((enc)->isPublicId)(enc, ptr, end, badPtr))
255
256#define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
257 (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
258
259#define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
260 (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
261
262typedef struct {
263 ENCODING initEnc;
264 const ENCODING **encPtr;
265} INIT_ENCODING;
266
267int XmlParseXmlDecl(int isGeneralTextEntity,
268 const ENCODING *enc,
269 const char *ptr,
270 const char *end,
271 const char **badPtr,
272 const char **versionPtr,
273 const char **versionEndPtr,
274 const char **encodingNamePtr,
275 const ENCODING **namedEncodingPtr,
276 int *standalonePtr);
277
278int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
279const ENCODING *XmlGetUtf8InternalEncoding(void);
280const ENCODING *XmlGetUtf16InternalEncoding(void);
281int XmlUtf8Encode(int charNumber, char *buf);
282int XmlUtf16Encode(int charNumber, unsigned short *buf);
283
284int XmlSizeOfUnknownEncoding(void);
285ENCODING *
286XmlInitUnknownEncoding(void *mem,
287 int *table,
288 int (*conv)(void *userData, const char *p),
289 void *userData);
290
291int XmlParseXmlDeclNS(int isGeneralTextEntity,
292 const ENCODING *enc,
293 const char *ptr,
294 const char *end,
295 const char **badPtr,
296 const char **versionPtr,
297 const char **versionEndPtr,
298 const char **encodingNamePtr,
299 const ENCODING **namedEncodingPtr,
300 int *standalonePtr);
301int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
302const ENCODING *XmlGetUtf8InternalEncodingNS(void);
303const ENCODING *XmlGetUtf16InternalEncodingNS(void);
304ENCODING *
305XmlInitUnknownEncodingNS(void *mem,
306 int *table,
307 int (*conv)(void *userData, const char *p),
308 void *userData);
309#ifdef __cplusplus
310}
311#endif
312
313#endif /* not XmlTok_INCLUDED */
Note: See TracBrowser for help on using the repository browser.