1 | /*
|
---|
2 | * Secret Labs' Regular Expression Engine
|
---|
3 | *
|
---|
4 | * regular expression matching engine
|
---|
5 | *
|
---|
6 | * partial history:
|
---|
7 | * 1999-10-24 fl created (based on existing template matcher code)
|
---|
8 | * 2000-03-06 fl first alpha, sort of
|
---|
9 | * 2000-08-01 fl fixes for 1.6b1
|
---|
10 | * 2000-08-07 fl use PyOS_CheckStack() if available
|
---|
11 | * 2000-09-20 fl added expand method
|
---|
12 | * 2001-03-20 fl lots of fixes for 2.1b2
|
---|
13 | * 2001-04-15 fl export copyright as Python attribute, not global
|
---|
14 | * 2001-04-28 fl added __copy__ methods (work in progress)
|
---|
15 | * 2001-05-14 fl fixes for 1.5.2 compatibility
|
---|
16 | * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
|
---|
17 | * 2001-10-18 fl fixed group reset issue (from Matthew Mueller)
|
---|
18 | * 2001-10-20 fl added split primitive; reenable unicode for 1.6/2.0/2.1
|
---|
19 | * 2001-10-21 fl added sub/subn primitive
|
---|
20 | * 2001-10-24 fl added finditer primitive (for 2.2 only)
|
---|
21 | * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum)
|
---|
22 | * 2002-11-09 fl fixed empty sub/subn return type
|
---|
23 | * 2003-04-18 mvl fully support 4-byte codes
|
---|
24 | * 2003-10-17 gn implemented non recursive scheme
|
---|
25 | *
|
---|
26 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
---|
27 | *
|
---|
28 | * This version of the SRE library can be redistributed under CNRI's
|
---|
29 | * Python 1.6 license. For any other use, please contact Secret Labs
|
---|
30 | * AB (info@pythonware.com).
|
---|
31 | *
|
---|
32 | * Portions of this engine have been developed in cooperation with
|
---|
33 | * CNRI. Hewlett-Packard provided funding for 1.6 integration and
|
---|
34 | * other compatibility work.
|
---|
35 | */
|
---|
36 |
|
---|
37 | #ifndef SRE_RECURSIVE
|
---|
38 |
|
---|
39 | static char copyright[] =
|
---|
40 | " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
|
---|
41 |
|
---|
42 | #define PY_SSIZE_T_CLEAN
|
---|
43 |
|
---|
44 | #include "Python.h"
|
---|
45 | #include "structmember.h" /* offsetof */
|
---|
46 |
|
---|
47 | #include "sre.h"
|
---|
48 |
|
---|
49 | #include <ctype.h>
|
---|
50 |
|
---|
51 | /* name of this module, minus the leading underscore */
|
---|
52 | #if !defined(SRE_MODULE)
|
---|
53 | #define SRE_MODULE "sre"
|
---|
54 | #endif
|
---|
55 |
|
---|
56 | #define SRE_PY_MODULE "re"
|
---|
57 |
|
---|
58 | /* defining this one enables tracing */
|
---|
59 | #undef VERBOSE
|
---|
60 |
|
---|
61 | #if PY_VERSION_HEX >= 0x01060000
|
---|
62 | #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE)
|
---|
63 | /* defining this enables unicode support (default under 1.6a1 and later) */
|
---|
64 | #define HAVE_UNICODE
|
---|
65 | #endif
|
---|
66 | #endif
|
---|
67 |
|
---|
68 | /* -------------------------------------------------------------------- */
|
---|
69 | /* optional features */
|
---|
70 |
|
---|
71 | /* enables fast searching */
|
---|
72 | #define USE_FAST_SEARCH
|
---|
73 |
|
---|
74 | /* enables aggressive inlining (always on for Visual C) */
|
---|
75 | #undef USE_INLINE
|
---|
76 |
|
---|
77 | /* enables copy/deepcopy handling (work in progress) */
|
---|
78 | #undef USE_BUILTIN_COPY
|
---|
79 |
|
---|
80 | #if PY_VERSION_HEX < 0x01060000
|
---|
81 | #define PyObject_DEL(op) PyMem_DEL((op))
|
---|
82 | #endif
|
---|
83 |
|
---|
84 | /* -------------------------------------------------------------------- */
|
---|
85 |
|
---|
86 | #if defined(_MSC_VER)
|
---|
87 | #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
|
---|
88 | #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
|
---|
89 | /* fastest possible local call under MSVC */
|
---|
90 | #define LOCAL(type) static __inline type __fastcall
|
---|
91 | #elif defined(USE_INLINE)
|
---|
92 | #define LOCAL(type) static inline type
|
---|
93 | #else
|
---|
94 | #define LOCAL(type) static type
|
---|
95 | #endif
|
---|
96 |
|
---|
97 | /* error codes */
|
---|
98 | #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
|
---|
99 | #define SRE_ERROR_STATE -2 /* illegal state */
|
---|
100 | #define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
|
---|
101 | #define SRE_ERROR_MEMORY -9 /* out of memory */
|
---|
102 |
|
---|
103 | #if defined(VERBOSE)
|
---|
104 | #define TRACE(v) printf v
|
---|
105 | #else
|
---|
106 | #define TRACE(v)
|
---|
107 | #endif
|
---|
108 |
|
---|
109 | /* -------------------------------------------------------------------- */
|
---|
110 | /* search engine state */
|
---|
111 |
|
---|
112 | /* default character predicates (run sre_chars.py to regenerate tables) */
|
---|
113 |
|
---|
114 | #define SRE_DIGIT_MASK 1
|
---|
115 | #define SRE_SPACE_MASK 2
|
---|
116 | #define SRE_LINEBREAK_MASK 4
|
---|
117 | #define SRE_ALNUM_MASK 8
|
---|
118 | #define SRE_WORD_MASK 16
|
---|
119 |
|
---|
120 | /* FIXME: this assumes ASCII. create tables in init_sre() instead */
|
---|
121 |
|
---|
122 | static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
|
---|
123 | 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
|
---|
124 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
|
---|
125 | 25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
126 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
|
---|
127 | 0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
---|
128 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
|
---|
129 |
|
---|
130 | static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
---|
131 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
|
---|
132 | 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
|
---|
133 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
|
---|
134 | 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
|
---|
135 | 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
---|
136 | 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
|
---|
137 | 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
---|
138 | 120, 121, 122, 123, 124, 125, 126, 127 };
|
---|
139 |
|
---|
140 | #define SRE_IS_DIGIT(ch)\
|
---|
141 | ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
|
---|
142 | #define SRE_IS_SPACE(ch)\
|
---|
143 | ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
|
---|
144 | #define SRE_IS_LINEBREAK(ch)\
|
---|
145 | ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
|
---|
146 | #define SRE_IS_ALNUM(ch)\
|
---|
147 | ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
|
---|
148 | #define SRE_IS_WORD(ch)\
|
---|
149 | ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
|
---|
150 |
|
---|
151 | static unsigned int sre_lower(unsigned int ch)
|
---|
152 | {
|
---|
153 | return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
|
---|
154 | }
|
---|
155 |
|
---|
156 | /* locale-specific character predicates */
|
---|
157 | /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
|
---|
158 | * warnings when c's type supports only numbers < N+1 */
|
---|
159 | #define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
|
---|
160 | #define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
|
---|
161 | #define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
|
---|
162 | #define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
|
---|
163 | #define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
|
---|
164 |
|
---|
165 | static unsigned int sre_lower_locale(unsigned int ch)
|
---|
166 | {
|
---|
167 | return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
|
---|
168 | }
|
---|
169 |
|
---|
170 | /* unicode-specific character predicates */
|
---|
171 |
|
---|
172 | #if defined(HAVE_UNICODE)
|
---|
173 |
|
---|
174 | #define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDIGIT((Py_UNICODE)(ch))
|
---|
175 | #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
|
---|
176 | #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
|
---|
177 | #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
|
---|
178 | #define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
|
---|
179 |
|
---|
180 | static unsigned int sre_lower_unicode(unsigned int ch)
|
---|
181 | {
|
---|
182 | return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
|
---|
183 | }
|
---|
184 |
|
---|
185 | #endif
|
---|
186 |
|
---|
187 | LOCAL(int)
|
---|
188 | sre_category(SRE_CODE category, unsigned int ch)
|
---|
189 | {
|
---|
190 | switch (category) {
|
---|
191 |
|
---|
192 | case SRE_CATEGORY_DIGIT:
|
---|
193 | return SRE_IS_DIGIT(ch);
|
---|
194 | case SRE_CATEGORY_NOT_DIGIT:
|
---|
195 | return !SRE_IS_DIGIT(ch);
|
---|
196 | case SRE_CATEGORY_SPACE:
|
---|
197 | return SRE_IS_SPACE(ch);
|
---|
198 | case SRE_CATEGORY_NOT_SPACE:
|
---|
199 | return !SRE_IS_SPACE(ch);
|
---|
200 | case SRE_CATEGORY_WORD:
|
---|
201 | return SRE_IS_WORD(ch);
|
---|
202 | case SRE_CATEGORY_NOT_WORD:
|
---|
203 | return !SRE_IS_WORD(ch);
|
---|
204 | case SRE_CATEGORY_LINEBREAK:
|
---|
205 | return SRE_IS_LINEBREAK(ch);
|
---|
206 | case SRE_CATEGORY_NOT_LINEBREAK:
|
---|
207 | return !SRE_IS_LINEBREAK(ch);
|
---|
208 |
|
---|
209 | case SRE_CATEGORY_LOC_WORD:
|
---|
210 | return SRE_LOC_IS_WORD(ch);
|
---|
211 | case SRE_CATEGORY_LOC_NOT_WORD:
|
---|
212 | return !SRE_LOC_IS_WORD(ch);
|
---|
213 |
|
---|
214 | #if defined(HAVE_UNICODE)
|
---|
215 | case SRE_CATEGORY_UNI_DIGIT:
|
---|
216 | return SRE_UNI_IS_DIGIT(ch);
|
---|
217 | case SRE_CATEGORY_UNI_NOT_DIGIT:
|
---|
218 | return !SRE_UNI_IS_DIGIT(ch);
|
---|
219 | case SRE_CATEGORY_UNI_SPACE:
|
---|
220 | return SRE_UNI_IS_SPACE(ch);
|
---|
221 | case SRE_CATEGORY_UNI_NOT_SPACE:
|
---|
222 | return !SRE_UNI_IS_SPACE(ch);
|
---|
223 | case SRE_CATEGORY_UNI_WORD:
|
---|
224 | return SRE_UNI_IS_WORD(ch);
|
---|
225 | case SRE_CATEGORY_UNI_NOT_WORD:
|
---|
226 | return !SRE_UNI_IS_WORD(ch);
|
---|
227 | case SRE_CATEGORY_UNI_LINEBREAK:
|
---|
228 | return SRE_UNI_IS_LINEBREAK(ch);
|
---|
229 | case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
---|
230 | return !SRE_UNI_IS_LINEBREAK(ch);
|
---|
231 | #else
|
---|
232 | case SRE_CATEGORY_UNI_DIGIT:
|
---|
233 | return SRE_IS_DIGIT(ch);
|
---|
234 | case SRE_CATEGORY_UNI_NOT_DIGIT:
|
---|
235 | return !SRE_IS_DIGIT(ch);
|
---|
236 | case SRE_CATEGORY_UNI_SPACE:
|
---|
237 | return SRE_IS_SPACE(ch);
|
---|
238 | case SRE_CATEGORY_UNI_NOT_SPACE:
|
---|
239 | return !SRE_IS_SPACE(ch);
|
---|
240 | case SRE_CATEGORY_UNI_WORD:
|
---|
241 | return SRE_LOC_IS_WORD(ch);
|
---|
242 | case SRE_CATEGORY_UNI_NOT_WORD:
|
---|
243 | return !SRE_LOC_IS_WORD(ch);
|
---|
244 | case SRE_CATEGORY_UNI_LINEBREAK:
|
---|
245 | return SRE_IS_LINEBREAK(ch);
|
---|
246 | case SRE_CATEGORY_UNI_NOT_LINEBREAK:
|
---|
247 | return !SRE_IS_LINEBREAK(ch);
|
---|
248 | #endif
|
---|
249 | }
|
---|
250 | return 0;
|
---|
251 | }
|
---|
252 |
|
---|
253 | /* helpers */
|
---|
254 |
|
---|
255 | static void
|
---|
256 | data_stack_dealloc(SRE_STATE* state)
|
---|
257 | {
|
---|
258 | if (state->data_stack) {
|
---|
259 | PyMem_FREE(state->data_stack);
|
---|
260 | state->data_stack = NULL;
|
---|
261 | }
|
---|
262 | state->data_stack_size = state->data_stack_base = 0;
|
---|
263 | }
|
---|
264 |
|
---|
265 | static int
|
---|
266 | data_stack_grow(SRE_STATE* state, Py_ssize_t size)
|
---|
267 | {
|
---|
268 | Py_ssize_t minsize, cursize;
|
---|
269 | minsize = state->data_stack_base+size;
|
---|
270 | cursize = state->data_stack_size;
|
---|
271 | if (cursize < minsize) {
|
---|
272 | void* stack;
|
---|
273 | cursize = minsize+minsize/4+1024;
|
---|
274 | TRACE(("allocate/grow stack %d\n", cursize));
|
---|
275 | stack = PyMem_REALLOC(state->data_stack, cursize);
|
---|
276 | if (!stack) {
|
---|
277 | data_stack_dealloc(state);
|
---|
278 | return SRE_ERROR_MEMORY;
|
---|
279 | }
|
---|
280 | state->data_stack = (char *)stack;
|
---|
281 | state->data_stack_size = cursize;
|
---|
282 | }
|
---|
283 | return 0;
|
---|
284 | }
|
---|
285 |
|
---|
286 | /* generate 8-bit version */
|
---|
287 |
|
---|
288 | #define SRE_CHAR unsigned char
|
---|
289 | #define SRE_AT sre_at
|
---|
290 | #define SRE_COUNT sre_count
|
---|
291 | #define SRE_CHARSET sre_charset
|
---|
292 | #define SRE_INFO sre_info
|
---|
293 | #define SRE_MATCH sre_match
|
---|
294 | #define SRE_MATCH_CONTEXT sre_match_context
|
---|
295 | #define SRE_SEARCH sre_search
|
---|
296 | #define SRE_LITERAL_TEMPLATE sre_literal_template
|
---|
297 |
|
---|
298 | #if defined(HAVE_UNICODE)
|
---|
299 |
|
---|
300 | #define SRE_RECURSIVE
|
---|
301 | #include "_sre.c"
|
---|
302 | #undef SRE_RECURSIVE
|
---|
303 |
|
---|
304 | #undef SRE_LITERAL_TEMPLATE
|
---|
305 | #undef SRE_SEARCH
|
---|
306 | #undef SRE_MATCH
|
---|
307 | #undef SRE_MATCH_CONTEXT
|
---|
308 | #undef SRE_INFO
|
---|
309 | #undef SRE_CHARSET
|
---|
310 | #undef SRE_COUNT
|
---|
311 | #undef SRE_AT
|
---|
312 | #undef SRE_CHAR
|
---|
313 |
|
---|
314 | /* generate 16-bit unicode version */
|
---|
315 |
|
---|
316 | #define SRE_CHAR Py_UNICODE
|
---|
317 | #define SRE_AT sre_uat
|
---|
318 | #define SRE_COUNT sre_ucount
|
---|
319 | #define SRE_CHARSET sre_ucharset
|
---|
320 | #define SRE_INFO sre_uinfo
|
---|
321 | #define SRE_MATCH sre_umatch
|
---|
322 | #define SRE_MATCH_CONTEXT sre_umatch_context
|
---|
323 | #define SRE_SEARCH sre_usearch
|
---|
324 | #define SRE_LITERAL_TEMPLATE sre_uliteral_template
|
---|
325 | #endif
|
---|
326 |
|
---|
327 | #endif /* SRE_RECURSIVE */
|
---|
328 |
|
---|
329 | /* -------------------------------------------------------------------- */
|
---|
330 | /* String matching engine */
|
---|
331 |
|
---|
332 | /* the following section is compiled twice, with different character
|
---|
333 | settings */
|
---|
334 |
|
---|
335 | LOCAL(int)
|
---|
336 | SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
---|
337 | {
|
---|
338 | /* check if pointer is at given position */
|
---|
339 |
|
---|
340 | Py_ssize_t thisp, thatp;
|
---|
341 |
|
---|
342 | switch (at) {
|
---|
343 |
|
---|
344 | case SRE_AT_BEGINNING:
|
---|
345 | case SRE_AT_BEGINNING_STRING:
|
---|
346 | return ((void*) ptr == state->beginning);
|
---|
347 |
|
---|
348 | case SRE_AT_BEGINNING_LINE:
|
---|
349 | return ((void*) ptr == state->beginning ||
|
---|
350 | SRE_IS_LINEBREAK((int) ptr[-1]));
|
---|
351 |
|
---|
352 | case SRE_AT_END:
|
---|
353 | return (((void*) (ptr+1) == state->end &&
|
---|
354 | SRE_IS_LINEBREAK((int) ptr[0])) ||
|
---|
355 | ((void*) ptr == state->end));
|
---|
356 |
|
---|
357 | case SRE_AT_END_LINE:
|
---|
358 | return ((void*) ptr == state->end ||
|
---|
359 | SRE_IS_LINEBREAK((int) ptr[0]));
|
---|
360 |
|
---|
361 | case SRE_AT_END_STRING:
|
---|
362 | return ((void*) ptr == state->end);
|
---|
363 |
|
---|
364 | case SRE_AT_BOUNDARY:
|
---|
365 | if (state->beginning == state->end)
|
---|
366 | return 0;
|
---|
367 | thatp = ((void*) ptr > state->beginning) ?
|
---|
368 | SRE_IS_WORD((int) ptr[-1]) : 0;
|
---|
369 | thisp = ((void*) ptr < state->end) ?
|
---|
370 | SRE_IS_WORD((int) ptr[0]) : 0;
|
---|
371 | return thisp != thatp;
|
---|
372 |
|
---|
373 | case SRE_AT_NON_BOUNDARY:
|
---|
374 | if (state->beginning == state->end)
|
---|
375 | return 0;
|
---|
376 | thatp = ((void*) ptr > state->beginning) ?
|
---|
377 | SRE_IS_WORD((int) ptr[-1]) : 0;
|
---|
378 | thisp = ((void*) ptr < state->end) ?
|
---|
379 | SRE_IS_WORD((int) ptr[0]) : 0;
|
---|
380 | return thisp == thatp;
|
---|
381 |
|
---|
382 | case SRE_AT_LOC_BOUNDARY:
|
---|
383 | if (state->beginning == state->end)
|
---|
384 | return 0;
|
---|
385 | thatp = ((void*) ptr > state->beginning) ?
|
---|
386 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
---|
387 | thisp = ((void*) ptr < state->end) ?
|
---|
388 | SRE_LOC_IS_WORD((int) ptr[0]) : 0;
|
---|
389 | return thisp != thatp;
|
---|
390 |
|
---|
391 | case SRE_AT_LOC_NON_BOUNDARY:
|
---|
392 | if (state->beginning == state->end)
|
---|
393 | return 0;
|
---|
394 | thatp = ((void*) ptr > state->beginning) ?
|
---|
395 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
|
---|
396 | thisp = ((void*) ptr < state->end) ?
|
---|
397 | SRE_LOC_IS_WORD((int) ptr[0]) : 0;
|
---|
398 | return thisp == thatp;
|
---|
399 |
|
---|
400 | #if defined(HAVE_UNICODE)
|
---|
401 | case SRE_AT_UNI_BOUNDARY:
|
---|
402 | if (state->beginning == state->end)
|
---|
403 | return 0;
|
---|
404 | thatp = ((void*) ptr > state->beginning) ?
|
---|
405 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
---|
406 | thisp = ((void*) ptr < state->end) ?
|
---|
407 | SRE_UNI_IS_WORD((int) ptr[0]) : 0;
|
---|
408 | return thisp != thatp;
|
---|
409 |
|
---|
410 | case SRE_AT_UNI_NON_BOUNDARY:
|
---|
411 | if (state->beginning == state->end)
|
---|
412 | return 0;
|
---|
413 | thatp = ((void*) ptr > state->beginning) ?
|
---|
414 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
|
---|
415 | thisp = ((void*) ptr < state->end) ?
|
---|
416 | SRE_UNI_IS_WORD((int) ptr[0]) : 0;
|
---|
417 | return thisp == thatp;
|
---|
418 | #endif
|
---|
419 |
|
---|
420 | }
|
---|
421 |
|
---|
422 | return 0;
|
---|
423 | }
|
---|
424 |
|
---|
425 | LOCAL(int)
|
---|
426 | SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
|
---|
427 | {
|
---|
428 | /* check if character is a member of the given set */
|
---|
429 |
|
---|
430 | int ok = 1;
|
---|
431 |
|
---|
432 | for (;;) {
|
---|
433 | switch (*set++) {
|
---|
434 |
|
---|
435 | case SRE_OP_FAILURE:
|
---|
436 | return !ok;
|
---|
437 |
|
---|
438 | case SRE_OP_LITERAL:
|
---|
439 | /* <LITERAL> <code> */
|
---|
440 | if (ch == set[0])
|
---|
441 | return ok;
|
---|
442 | set++;
|
---|
443 | break;
|
---|
444 |
|
---|
445 | case SRE_OP_CATEGORY:
|
---|
446 | /* <CATEGORY> <code> */
|
---|
447 | if (sre_category(set[0], (int) ch))
|
---|
448 | return ok;
|
---|
449 | set += 1;
|
---|
450 | break;
|
---|
451 |
|
---|
452 | case SRE_OP_CHARSET:
|
---|
453 | if (sizeof(SRE_CODE) == 2) {
|
---|
454 | /* <CHARSET> <bitmap> (16 bits per code word) */
|
---|
455 | if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
|
---|
456 | return ok;
|
---|
457 | set += 16;
|
---|
458 | }
|
---|
459 | else {
|
---|
460 | /* <CHARSET> <bitmap> (32 bits per code word) */
|
---|
461 | if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
|
---|
462 | return ok;
|
---|
463 | set += 8;
|
---|
464 | }
|
---|
465 | break;
|
---|
466 |
|
---|
467 | case SRE_OP_RANGE:
|
---|
468 | /* <RANGE> <lower> <upper> */
|
---|
469 | if (set[0] <= ch && ch <= set[1])
|
---|
470 | return ok;
|
---|
471 | set += 2;
|
---|
472 | break;
|
---|
473 |
|
---|
474 | case SRE_OP_NEGATE:
|
---|
475 | ok = !ok;
|
---|
476 | break;
|
---|
477 |
|
---|
478 | case SRE_OP_BIGCHARSET:
|
---|
479 | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
|
---|
480 | {
|
---|
481 | Py_ssize_t count, block;
|
---|
482 | count = *(set++);
|
---|
483 |
|
---|
484 | if (sizeof(SRE_CODE) == 2) {
|
---|
485 | block = ((unsigned char*)set)[ch >> 8];
|
---|
486 | set += 128;
|
---|
487 | if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
|
---|
488 | return ok;
|
---|
489 | set += count*16;
|
---|
490 | }
|
---|
491 | else {
|
---|
492 | /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
|
---|
493 | * warnings when c's type supports only numbers < N+1 */
|
---|
494 | if (!(ch & ~65535))
|
---|
495 | block = ((unsigned char*)set)[ch >> 8];
|
---|
496 | else
|
---|
497 | block = -1;
|
---|
498 | set += 64;
|
---|
499 | if (block >=0 &&
|
---|
500 | (set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
|
---|
501 | return ok;
|
---|
502 | set += count*8;
|
---|
503 | }
|
---|
504 | break;
|
---|
505 | }
|
---|
506 |
|
---|
507 | default:
|
---|
508 | /* internal error -- there's not much we can do about it
|
---|
509 | here, so let's just pretend it didn't match... */
|
---|
510 | return 0;
|
---|
511 | }
|
---|
512 | }
|
---|
513 | }
|
---|
514 |
|
---|
515 | LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
|
---|
516 |
|
---|
517 | LOCAL(Py_ssize_t)
|
---|
518 | SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
---|
519 | {
|
---|
520 | SRE_CODE chr;
|
---|
521 | SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
|
---|
522 | SRE_CHAR* end = (SRE_CHAR *)state->end;
|
---|
523 | Py_ssize_t i;
|
---|
524 |
|
---|
525 | /* adjust end */
|
---|
526 | if (maxcount < end - ptr && maxcount != 65535)
|
---|
527 | end = ptr + maxcount;
|
---|
528 |
|
---|
529 | switch (pattern[0]) {
|
---|
530 |
|
---|
531 | case SRE_OP_IN:
|
---|
532 | /* repeated set */
|
---|
533 | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
|
---|
534 | while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
|
---|
535 | ptr++;
|
---|
536 | break;
|
---|
537 |
|
---|
538 | case SRE_OP_ANY:
|
---|
539 | /* repeated dot wildcard. */
|
---|
540 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
|
---|
541 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
|
---|
542 | ptr++;
|
---|
543 | break;
|
---|
544 |
|
---|
545 | case SRE_OP_ANY_ALL:
|
---|
546 | /* repeated dot wildcard. skip to the end of the target
|
---|
547 | string, and backtrack from there */
|
---|
548 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
|
---|
549 | ptr = end;
|
---|
550 | break;
|
---|
551 |
|
---|
552 | case SRE_OP_LITERAL:
|
---|
553 | /* repeated literal */
|
---|
554 | chr = pattern[1];
|
---|
555 | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
|
---|
556 | while (ptr < end && (SRE_CODE) *ptr == chr)
|
---|
557 | ptr++;
|
---|
558 | break;
|
---|
559 |
|
---|
560 | case SRE_OP_LITERAL_IGNORE:
|
---|
561 | /* repeated literal */
|
---|
562 | chr = pattern[1];
|
---|
563 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
|
---|
564 | while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
|
---|
565 | ptr++;
|
---|
566 | break;
|
---|
567 |
|
---|
568 | case SRE_OP_NOT_LITERAL:
|
---|
569 | /* repeated non-literal */
|
---|
570 | chr = pattern[1];
|
---|
571 | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
|
---|
572 | while (ptr < end && (SRE_CODE) *ptr != chr)
|
---|
573 | ptr++;
|
---|
574 | break;
|
---|
575 |
|
---|
576 | case SRE_OP_NOT_LITERAL_IGNORE:
|
---|
577 | /* repeated non-literal */
|
---|
578 | chr = pattern[1];
|
---|
579 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
|
---|
580 | while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
|
---|
581 | ptr++;
|
---|
582 | break;
|
---|
583 |
|
---|
584 | default:
|
---|
585 | /* repeated single character pattern */
|
---|
586 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
|
---|
587 | while ((SRE_CHAR*) state->ptr < end) {
|
---|
588 | i = SRE_MATCH(state, pattern);
|
---|
589 | if (i < 0)
|
---|
590 | return i;
|
---|
591 | if (!i)
|
---|
592 | break;
|
---|
593 | }
|
---|
594 | TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
|
---|
595 | (SRE_CHAR*) state->ptr - ptr));
|
---|
596 | return (SRE_CHAR*) state->ptr - ptr;
|
---|
597 | }
|
---|
598 |
|
---|
599 | TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
|
---|
600 | return ptr - (SRE_CHAR*) state->ptr;
|
---|
601 | }
|
---|
602 |
|
---|
603 | #if 0 /* not used in this release */
|
---|
604 | LOCAL(int)
|
---|
605 | SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
|
---|
606 | {
|
---|
607 | /* check if an SRE_OP_INFO block matches at the current position.
|
---|
608 | returns the number of SRE_CODE objects to skip if successful, 0
|
---|
609 | if no match */
|
---|
610 |
|
---|
611 | SRE_CHAR* end = state->end;
|
---|
612 | SRE_CHAR* ptr = state->ptr;
|
---|
613 | Py_ssize_t i;
|
---|
614 |
|
---|
615 | /* check minimal length */
|
---|
616 | if (pattern[3] && (end - ptr) < pattern[3])
|
---|
617 | return 0;
|
---|
618 |
|
---|
619 | /* check known prefix */
|
---|
620 | if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
|
---|
621 | /* <length> <skip> <prefix data> <overlap data> */
|
---|
622 | for (i = 0; i < pattern[5]; i++)
|
---|
623 | if ((SRE_CODE) ptr[i] != pattern[7 + i])
|
---|
624 | return 0;
|
---|
625 | return pattern[0] + 2 * pattern[6];
|
---|
626 | }
|
---|
627 | return pattern[0];
|
---|
628 | }
|
---|
629 | #endif
|
---|
630 |
|
---|
631 | /* The macros below should be used to protect recursive SRE_MATCH()
|
---|
632 | * calls that *failed* and do *not* return immediately (IOW, those
|
---|
633 | * that will backtrack). Explaining:
|
---|
634 | *
|
---|
635 | * - Recursive SRE_MATCH() returned true: that's usually a success
|
---|
636 | * (besides atypical cases like ASSERT_NOT), therefore there's no
|
---|
637 | * reason to restore lastmark;
|
---|
638 | *
|
---|
639 | * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
|
---|
640 | * is returning to the caller: If the current SRE_MATCH() is the
|
---|
641 | * top function of the recursion, returning false will be a matching
|
---|
642 | * failure, and it doesn't matter where lastmark is pointing to.
|
---|
643 | * If it's *not* the top function, it will be a recursive SRE_MATCH()
|
---|
644 | * failure by itself, and the calling SRE_MATCH() will have to deal
|
---|
645 | * with the failure by the same rules explained here (it will restore
|
---|
646 | * lastmark by itself if necessary);
|
---|
647 | *
|
---|
648 | * - Recursive SRE_MATCH() returned false, and will continue the
|
---|
649 | * outside 'for' loop: must be protected when breaking, since the next
|
---|
650 | * OP could potentially depend on lastmark;
|
---|
651 | *
|
---|
652 | * - Recursive SRE_MATCH() returned false, and will be called again
|
---|
653 | * inside a local for/while loop: must be protected between each
|
---|
654 | * loop iteration, since the recursive SRE_MATCH() could do anything,
|
---|
655 | * and could potentially depend on lastmark.
|
---|
656 | *
|
---|
657 | * For more information, check the discussion at SF patch #712900.
|
---|
658 | */
|
---|
659 | #define LASTMARK_SAVE() \
|
---|
660 | do { \
|
---|
661 | ctx->lastmark = state->lastmark; \
|
---|
662 | ctx->lastindex = state->lastindex; \
|
---|
663 | } while (0)
|
---|
664 | #define LASTMARK_RESTORE() \
|
---|
665 | do { \
|
---|
666 | state->lastmark = ctx->lastmark; \
|
---|
667 | state->lastindex = ctx->lastindex; \
|
---|
668 | } while (0)
|
---|
669 |
|
---|
670 | #define RETURN_ERROR(i) do { return i; } while(0)
|
---|
671 | #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
|
---|
672 | #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
|
---|
673 |
|
---|
674 | #define RETURN_ON_ERROR(i) \
|
---|
675 | do { if (i < 0) RETURN_ERROR(i); } while (0)
|
---|
676 | #define RETURN_ON_SUCCESS(i) \
|
---|
677 | do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
|
---|
678 | #define RETURN_ON_FAILURE(i) \
|
---|
679 | do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
|
---|
680 |
|
---|
681 | #define SFY(x) #x
|
---|
682 |
|
---|
683 | #define DATA_STACK_ALLOC(state, type, ptr) \
|
---|
684 | do { \
|
---|
685 | alloc_pos = state->data_stack_base; \
|
---|
686 | TRACE(("allocating %s in %d (%d)\n", \
|
---|
687 | SFY(type), alloc_pos, sizeof(type))); \
|
---|
688 | if (state->data_stack_size < alloc_pos+sizeof(type)) { \
|
---|
689 | int j = data_stack_grow(state, sizeof(type)); \
|
---|
690 | if (j < 0) return j; \
|
---|
691 | if (ctx_pos != -1) \
|
---|
692 | DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
|
---|
693 | } \
|
---|
694 | ptr = (type*)(state->data_stack+alloc_pos); \
|
---|
695 | state->data_stack_base += sizeof(type); \
|
---|
696 | } while (0)
|
---|
697 |
|
---|
698 | #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
|
---|
699 | do { \
|
---|
700 | TRACE(("looking up %s at %d\n", SFY(type), pos)); \
|
---|
701 | ptr = (type*)(state->data_stack+pos); \
|
---|
702 | } while (0)
|
---|
703 |
|
---|
704 | #define DATA_STACK_PUSH(state, data, size) \
|
---|
705 | do { \
|
---|
706 | TRACE(("copy data in %p to %d (%d)\n", \
|
---|
707 | data, state->data_stack_base, size)); \
|
---|
708 | if (state->data_stack_size < state->data_stack_base+size) { \
|
---|
709 | int j = data_stack_grow(state, size); \
|
---|
710 | if (j < 0) return j; \
|
---|
711 | if (ctx_pos != -1) \
|
---|
712 | DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
|
---|
713 | } \
|
---|
714 | memcpy(state->data_stack+state->data_stack_base, data, size); \
|
---|
715 | state->data_stack_base += size; \
|
---|
716 | } while (0)
|
---|
717 |
|
---|
718 | #define DATA_STACK_POP(state, data, size, discard) \
|
---|
719 | do { \
|
---|
720 | TRACE(("copy data to %p from %d (%d)\n", \
|
---|
721 | data, state->data_stack_base-size, size)); \
|
---|
722 | memcpy(data, state->data_stack+state->data_stack_base-size, size); \
|
---|
723 | if (discard) \
|
---|
724 | state->data_stack_base -= size; \
|
---|
725 | } while (0)
|
---|
726 |
|
---|
727 | #define DATA_STACK_POP_DISCARD(state, size) \
|
---|
728 | do { \
|
---|
729 | TRACE(("discard data from %d (%d)\n", \
|
---|
730 | state->data_stack_base-size, size)); \
|
---|
731 | state->data_stack_base -= size; \
|
---|
732 | } while(0)
|
---|
733 |
|
---|
734 | #define DATA_PUSH(x) \
|
---|
735 | DATA_STACK_PUSH(state, (x), sizeof(*(x)))
|
---|
736 | #define DATA_POP(x) \
|
---|
737 | DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
|
---|
738 | #define DATA_POP_DISCARD(x) \
|
---|
739 | DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
|
---|
740 | #define DATA_ALLOC(t,p) \
|
---|
741 | DATA_STACK_ALLOC(state, t, p)
|
---|
742 | #define DATA_LOOKUP_AT(t,p,pos) \
|
---|
743 | DATA_STACK_LOOKUP_AT(state,t,p,pos)
|
---|
744 |
|
---|
745 | #define MARK_PUSH(lastmark) \
|
---|
746 | do if (lastmark > 0) { \
|
---|
747 | i = lastmark; /* ctx->lastmark may change if reallocated */ \
|
---|
748 | DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
|
---|
749 | } while (0)
|
---|
750 | #define MARK_POP(lastmark) \
|
---|
751 | do if (lastmark > 0) { \
|
---|
752 | DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
|
---|
753 | } while (0)
|
---|
754 | #define MARK_POP_KEEP(lastmark) \
|
---|
755 | do if (lastmark > 0) { \
|
---|
756 | DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
|
---|
757 | } while (0)
|
---|
758 | #define MARK_POP_DISCARD(lastmark) \
|
---|
759 | do if (lastmark > 0) { \
|
---|
760 | DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
|
---|
761 | } while (0)
|
---|
762 |
|
---|
763 | #define JUMP_NONE 0
|
---|
764 | #define JUMP_MAX_UNTIL_1 1
|
---|
765 | #define JUMP_MAX_UNTIL_2 2
|
---|
766 | #define JUMP_MAX_UNTIL_3 3
|
---|
767 | #define JUMP_MIN_UNTIL_1 4
|
---|
768 | #define JUMP_MIN_UNTIL_2 5
|
---|
769 | #define JUMP_MIN_UNTIL_3 6
|
---|
770 | #define JUMP_REPEAT 7
|
---|
771 | #define JUMP_REPEAT_ONE_1 8
|
---|
772 | #define JUMP_REPEAT_ONE_2 9
|
---|
773 | #define JUMP_MIN_REPEAT_ONE 10
|
---|
774 | #define JUMP_BRANCH 11
|
---|
775 | #define JUMP_ASSERT 12
|
---|
776 | #define JUMP_ASSERT_NOT 13
|
---|
777 |
|
---|
778 | #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
|
---|
779 | DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
|
---|
780 | nextctx->last_ctx_pos = ctx_pos; \
|
---|
781 | nextctx->jump = jumpvalue; \
|
---|
782 | nextctx->pattern = nextpattern; \
|
---|
783 | ctx_pos = alloc_pos; \
|
---|
784 | ctx = nextctx; \
|
---|
785 | goto entrance; \
|
---|
786 | jumplabel: \
|
---|
787 | while (0) /* gcc doesn't like labels at end of scopes */ \
|
---|
788 |
|
---|
789 | typedef struct {
|
---|
790 | Py_ssize_t last_ctx_pos;
|
---|
791 | Py_ssize_t jump;
|
---|
792 | SRE_CHAR* ptr;
|
---|
793 | SRE_CODE* pattern;
|
---|
794 | Py_ssize_t count;
|
---|
795 | Py_ssize_t lastmark;
|
---|
796 | Py_ssize_t lastindex;
|
---|
797 | union {
|
---|
798 | SRE_CODE chr;
|
---|
799 | SRE_REPEAT* rep;
|
---|
800 | } u;
|
---|
801 | } SRE_MATCH_CONTEXT;
|
---|
802 |
|
---|
803 | /* check if string matches the given pattern. returns <0 for
|
---|
804 | error, 0 for failure, and 1 for success */
|
---|
805 | LOCAL(Py_ssize_t)
|
---|
806 | SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
---|
807 | {
|
---|
808 | SRE_CHAR* end = (SRE_CHAR *)state->end;
|
---|
809 | Py_ssize_t alloc_pos, ctx_pos = -1;
|
---|
810 | Py_ssize_t i, ret = 0;
|
---|
811 | Py_ssize_t jump;
|
---|
812 |
|
---|
813 | SRE_MATCH_CONTEXT* ctx;
|
---|
814 | SRE_MATCH_CONTEXT* nextctx;
|
---|
815 |
|
---|
816 | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
|
---|
817 |
|
---|
818 | DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
|
---|
819 | ctx->last_ctx_pos = -1;
|
---|
820 | ctx->jump = JUMP_NONE;
|
---|
821 | ctx->pattern = pattern;
|
---|
822 | ctx_pos = alloc_pos;
|
---|
823 |
|
---|
824 | entrance:
|
---|
825 |
|
---|
826 | ctx->ptr = (SRE_CHAR *)state->ptr;
|
---|
827 |
|
---|
828 | if (ctx->pattern[0] == SRE_OP_INFO) {
|
---|
829 | /* optimization info block */
|
---|
830 | /* <INFO> <1=skip> <2=flags> <3=min> ... */
|
---|
831 | if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
|
---|
832 | TRACE(("reject (got %d chars, need %d)\n",
|
---|
833 | (end - ctx->ptr), ctx->pattern[3]));
|
---|
834 | RETURN_FAILURE;
|
---|
835 | }
|
---|
836 | ctx->pattern += ctx->pattern[1] + 1;
|
---|
837 | }
|
---|
838 |
|
---|
839 | for (;;) {
|
---|
840 |
|
---|
841 | switch (*ctx->pattern++) {
|
---|
842 |
|
---|
843 | case SRE_OP_MARK:
|
---|
844 | /* set mark */
|
---|
845 | /* <MARK> <gid> */
|
---|
846 | TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
|
---|
847 | ctx->ptr, ctx->pattern[0]));
|
---|
848 | i = ctx->pattern[0];
|
---|
849 | if (i & 1)
|
---|
850 | state->lastindex = i/2 + 1;
|
---|
851 | if (i > state->lastmark) {
|
---|
852 | /* state->lastmark is the highest valid index in the
|
---|
853 | state->mark array. If it is increased by more than 1,
|
---|
854 | the intervening marks must be set to NULL to signal
|
---|
855 | that these marks have not been encountered. */
|
---|
856 | Py_ssize_t j = state->lastmark + 1;
|
---|
857 | while (j < i)
|
---|
858 | state->mark[j++] = NULL;
|
---|
859 | state->lastmark = i;
|
---|
860 | }
|
---|
861 | state->mark[i] = ctx->ptr;
|
---|
862 | ctx->pattern++;
|
---|
863 | break;
|
---|
864 |
|
---|
865 | case SRE_OP_LITERAL:
|
---|
866 | /* match literal string */
|
---|
867 | /* <LITERAL> <code> */
|
---|
868 | TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
|
---|
869 | ctx->ptr, *ctx->pattern));
|
---|
870 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
|
---|
871 | RETURN_FAILURE;
|
---|
872 | ctx->pattern++;
|
---|
873 | ctx->ptr++;
|
---|
874 | break;
|
---|
875 |
|
---|
876 | case SRE_OP_NOT_LITERAL:
|
---|
877 | /* match anything that is not literal character */
|
---|
878 | /* <NOT_LITERAL> <code> */
|
---|
879 | TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
|
---|
880 | ctx->ptr, *ctx->pattern));
|
---|
881 | if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
|
---|
882 | RETURN_FAILURE;
|
---|
883 | ctx->pattern++;
|
---|
884 | ctx->ptr++;
|
---|
885 | break;
|
---|
886 |
|
---|
887 | case SRE_OP_SUCCESS:
|
---|
888 | /* end of pattern */
|
---|
889 | TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
|
---|
890 | state->ptr = ctx->ptr;
|
---|
891 | RETURN_SUCCESS;
|
---|
892 |
|
---|
893 | case SRE_OP_AT:
|
---|
894 | /* match at given position */
|
---|
895 | /* <AT> <code> */
|
---|
896 | TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
|
---|
897 | if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
|
---|
898 | RETURN_FAILURE;
|
---|
899 | ctx->pattern++;
|
---|
900 | break;
|
---|
901 |
|
---|
902 | case SRE_OP_CATEGORY:
|
---|
903 | /* match at given category */
|
---|
904 | /* <CATEGORY> <code> */
|
---|
905 | TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
|
---|
906 | ctx->ptr, *ctx->pattern));
|
---|
907 | if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
|
---|
908 | RETURN_FAILURE;
|
---|
909 | ctx->pattern++;
|
---|
910 | ctx->ptr++;
|
---|
911 | break;
|
---|
912 |
|
---|
913 | case SRE_OP_ANY:
|
---|
914 | /* match anything (except a newline) */
|
---|
915 | /* <ANY> */
|
---|
916 | TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
|
---|
917 | if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
|
---|
918 | RETURN_FAILURE;
|
---|
919 | ctx->ptr++;
|
---|
920 | break;
|
---|
921 |
|
---|
922 | case SRE_OP_ANY_ALL:
|
---|
923 | /* match anything */
|
---|
924 | /* <ANY_ALL> */
|
---|
925 | TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
|
---|
926 | if (ctx->ptr >= end)
|
---|
927 | RETURN_FAILURE;
|
---|
928 | ctx->ptr++;
|
---|
929 | break;
|
---|
930 |
|
---|
931 | case SRE_OP_IN:
|
---|
932 | /* match set member (or non_member) */
|
---|
933 | /* <IN> <skip> <set> */
|
---|
934 | TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
|
---|
935 | if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
|
---|
936 | RETURN_FAILURE;
|
---|
937 | ctx->pattern += ctx->pattern[0];
|
---|
938 | ctx->ptr++;
|
---|
939 | break;
|
---|
940 |
|
---|
941 | case SRE_OP_LITERAL_IGNORE:
|
---|
942 | TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
|
---|
943 | ctx->pattern, ctx->ptr, ctx->pattern[0]));
|
---|
944 | if (ctx->ptr >= end ||
|
---|
945 | state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
|
---|
946 | RETURN_FAILURE;
|
---|
947 | ctx->pattern++;
|
---|
948 | ctx->ptr++;
|
---|
949 | break;
|
---|
950 |
|
---|
951 | case SRE_OP_NOT_LITERAL_IGNORE:
|
---|
952 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
|
---|
953 | ctx->pattern, ctx->ptr, *ctx->pattern));
|
---|
954 | if (ctx->ptr >= end ||
|
---|
955 | state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
|
---|
956 | RETURN_FAILURE;
|
---|
957 | ctx->pattern++;
|
---|
958 | ctx->ptr++;
|
---|
959 | break;
|
---|
960 |
|
---|
961 | case SRE_OP_IN_IGNORE:
|
---|
962 | TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
|
---|
963 | if (ctx->ptr >= end
|
---|
964 | || !SRE_CHARSET(ctx->pattern+1,
|
---|
965 | (SRE_CODE)state->lower(*ctx->ptr)))
|
---|
966 | RETURN_FAILURE;
|
---|
967 | ctx->pattern += ctx->pattern[0];
|
---|
968 | ctx->ptr++;
|
---|
969 | break;
|
---|
970 |
|
---|
971 | case SRE_OP_JUMP:
|
---|
972 | case SRE_OP_INFO:
|
---|
973 | /* jump forward */
|
---|
974 | /* <JUMP> <offset> */
|
---|
975 | TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
|
---|
976 | ctx->ptr, ctx->pattern[0]));
|
---|
977 | ctx->pattern += ctx->pattern[0];
|
---|
978 | break;
|
---|
979 |
|
---|
980 | case SRE_OP_BRANCH:
|
---|
981 | /* alternation */
|
---|
982 | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
|
---|
983 | TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
|
---|
984 | LASTMARK_SAVE();
|
---|
985 | ctx->u.rep = state->repeat;
|
---|
986 | if (ctx->u.rep)
|
---|
987 | MARK_PUSH(ctx->lastmark);
|
---|
988 | for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
|
---|
989 | if (ctx->pattern[1] == SRE_OP_LITERAL &&
|
---|
990 | (ctx->ptr >= end ||
|
---|
991 | (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
|
---|
992 | continue;
|
---|
993 | if (ctx->pattern[1] == SRE_OP_IN &&
|
---|
994 | (ctx->ptr >= end ||
|
---|
995 | !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
|
---|
996 | continue;
|
---|
997 | state->ptr = ctx->ptr;
|
---|
998 | DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
|
---|
999 | if (ret) {
|
---|
1000 | if (ctx->u.rep)
|
---|
1001 | MARK_POP_DISCARD(ctx->lastmark);
|
---|
1002 | RETURN_ON_ERROR(ret);
|
---|
1003 | RETURN_SUCCESS;
|
---|
1004 | }
|
---|
1005 | if (ctx->u.rep)
|
---|
1006 | MARK_POP_KEEP(ctx->lastmark);
|
---|
1007 | LASTMARK_RESTORE();
|
---|
1008 | }
|
---|
1009 | if (ctx->u.rep)
|
---|
1010 | MARK_POP_DISCARD(ctx->lastmark);
|
---|
1011 | RETURN_FAILURE;
|
---|
1012 |
|
---|
1013 | case SRE_OP_REPEAT_ONE:
|
---|
1014 | /* match repeated sequence (maximizing regexp) */
|
---|
1015 |
|
---|
1016 | /* this operator only works if the repeated item is
|
---|
1017 | exactly one character wide, and we're not already
|
---|
1018 | collecting backtracking points. for other cases,
|
---|
1019 | use the MAX_REPEAT operator */
|
---|
1020 |
|
---|
1021 | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
---|
1022 |
|
---|
1023 | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
|
---|
1024 | ctx->pattern[1], ctx->pattern[2]));
|
---|
1025 |
|
---|
1026 | if (ctx->ptr + ctx->pattern[1] > end)
|
---|
1027 | RETURN_FAILURE; /* cannot match */
|
---|
1028 |
|
---|
1029 | state->ptr = ctx->ptr;
|
---|
1030 |
|
---|
1031 | ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
|
---|
1032 | RETURN_ON_ERROR(ret);
|
---|
1033 | DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
---|
1034 | ctx->count = ret;
|
---|
1035 | ctx->ptr += ctx->count;
|
---|
1036 |
|
---|
1037 | /* when we arrive here, count contains the number of
|
---|
1038 | matches, and ctx->ptr points to the tail of the target
|
---|
1039 | string. check if the rest of the pattern matches,
|
---|
1040 | and backtrack if not. */
|
---|
1041 |
|
---|
1042 | if (ctx->count < (Py_ssize_t) ctx->pattern[1])
|
---|
1043 | RETURN_FAILURE;
|
---|
1044 |
|
---|
1045 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
|
---|
1046 | /* tail is empty. we're finished */
|
---|
1047 | state->ptr = ctx->ptr;
|
---|
1048 | RETURN_SUCCESS;
|
---|
1049 | }
|
---|
1050 |
|
---|
1051 | LASTMARK_SAVE();
|
---|
1052 |
|
---|
1053 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
|
---|
1054 | /* tail starts with a literal. skip positions where
|
---|
1055 | the rest of the pattern cannot possibly match */
|
---|
1056 | ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
|
---|
1057 | for (;;) {
|
---|
1058 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
|
---|
1059 | (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
|
---|
1060 | ctx->ptr--;
|
---|
1061 | ctx->count--;
|
---|
1062 | }
|
---|
1063 | if (ctx->count < (Py_ssize_t) ctx->pattern[1])
|
---|
1064 | break;
|
---|
1065 | state->ptr = ctx->ptr;
|
---|
1066 | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
|
---|
1067 | ctx->pattern+ctx->pattern[0]);
|
---|
1068 | if (ret) {
|
---|
1069 | RETURN_ON_ERROR(ret);
|
---|
1070 | RETURN_SUCCESS;
|
---|
1071 | }
|
---|
1072 |
|
---|
1073 | LASTMARK_RESTORE();
|
---|
1074 |
|
---|
1075 | ctx->ptr--;
|
---|
1076 | ctx->count--;
|
---|
1077 | }
|
---|
1078 |
|
---|
1079 | } else {
|
---|
1080 | /* general case */
|
---|
1081 | while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
|
---|
1082 | state->ptr = ctx->ptr;
|
---|
1083 | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
|
---|
1084 | ctx->pattern+ctx->pattern[0]);
|
---|
1085 | if (ret) {
|
---|
1086 | RETURN_ON_ERROR(ret);
|
---|
1087 | RETURN_SUCCESS;
|
---|
1088 | }
|
---|
1089 | ctx->ptr--;
|
---|
1090 | ctx->count--;
|
---|
1091 | LASTMARK_RESTORE();
|
---|
1092 | }
|
---|
1093 | }
|
---|
1094 | RETURN_FAILURE;
|
---|
1095 |
|
---|
1096 | case SRE_OP_MIN_REPEAT_ONE:
|
---|
1097 | /* match repeated sequence (minimizing regexp) */
|
---|
1098 |
|
---|
1099 | /* this operator only works if the repeated item is
|
---|
1100 | exactly one character wide, and we're not already
|
---|
1101 | collecting backtracking points. for other cases,
|
---|
1102 | use the MIN_REPEAT operator */
|
---|
1103 |
|
---|
1104 | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
|
---|
1105 |
|
---|
1106 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
|
---|
1107 | ctx->pattern[1], ctx->pattern[2]));
|
---|
1108 |
|
---|
1109 | if (ctx->ptr + ctx->pattern[1] > end)
|
---|
1110 | RETURN_FAILURE; /* cannot match */
|
---|
1111 |
|
---|
1112 | state->ptr = ctx->ptr;
|
---|
1113 |
|
---|
1114 | if (ctx->pattern[1] == 0)
|
---|
1115 | ctx->count = 0;
|
---|
1116 | else {
|
---|
1117 | /* count using pattern min as the maximum */
|
---|
1118 | ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
|
---|
1119 | RETURN_ON_ERROR(ret);
|
---|
1120 | DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
---|
1121 | if (ret < (Py_ssize_t) ctx->pattern[1])
|
---|
1122 | /* didn't match minimum number of times */
|
---|
1123 | RETURN_FAILURE;
|
---|
1124 | /* advance past minimum matches of repeat */
|
---|
1125 | ctx->count = ret;
|
---|
1126 | ctx->ptr += ctx->count;
|
---|
1127 | }
|
---|
1128 |
|
---|
1129 | if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
|
---|
1130 | /* tail is empty. we're finished */
|
---|
1131 | state->ptr = ctx->ptr;
|
---|
1132 | RETURN_SUCCESS;
|
---|
1133 |
|
---|
1134 | } else {
|
---|
1135 | /* general case */
|
---|
1136 | LASTMARK_SAVE();
|
---|
1137 | while ((Py_ssize_t)ctx->pattern[2] == 65535
|
---|
1138 | || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
|
---|
1139 | state->ptr = ctx->ptr;
|
---|
1140 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
|
---|
1141 | ctx->pattern+ctx->pattern[0]);
|
---|
1142 | if (ret) {
|
---|
1143 | RETURN_ON_ERROR(ret);
|
---|
1144 | RETURN_SUCCESS;
|
---|
1145 | }
|
---|
1146 | state->ptr = ctx->ptr;
|
---|
1147 | ret = SRE_COUNT(state, ctx->pattern+3, 1);
|
---|
1148 | RETURN_ON_ERROR(ret);
|
---|
1149 | DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
---|
1150 | if (ret == 0)
|
---|
1151 | break;
|
---|
1152 | assert(ret == 1);
|
---|
1153 | ctx->ptr++;
|
---|
1154 | ctx->count++;
|
---|
1155 | LASTMARK_RESTORE();
|
---|
1156 | }
|
---|
1157 | }
|
---|
1158 | RETURN_FAILURE;
|
---|
1159 |
|
---|
1160 | case SRE_OP_REPEAT:
|
---|
1161 | /* create repeat context. all the hard work is done
|
---|
1162 | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
|
---|
1163 | /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
|
---|
1164 | TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
|
---|
1165 | ctx->pattern[1], ctx->pattern[2]));
|
---|
1166 |
|
---|
1167 | /* install new repeat context */
|
---|
1168 | ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
|
---|
1169 | /* XXX(nnorwitz): anything else we need to do on error? */
|
---|
1170 | if (!ctx->u.rep)
|
---|
1171 | RETURN_FAILURE;
|
---|
1172 | ctx->u.rep->count = -1;
|
---|
1173 | ctx->u.rep->pattern = ctx->pattern;
|
---|
1174 | ctx->u.rep->prev = state->repeat;
|
---|
1175 | ctx->u.rep->last_ptr = NULL;
|
---|
1176 | state->repeat = ctx->u.rep;
|
---|
1177 |
|
---|
1178 | state->ptr = ctx->ptr;
|
---|
1179 | DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
|
---|
1180 | state->repeat = ctx->u.rep->prev;
|
---|
1181 | PyObject_FREE(ctx->u.rep);
|
---|
1182 |
|
---|
1183 | if (ret) {
|
---|
1184 | RETURN_ON_ERROR(ret);
|
---|
1185 | RETURN_SUCCESS;
|
---|
1186 | }
|
---|
1187 | RETURN_FAILURE;
|
---|
1188 |
|
---|
1189 | case SRE_OP_MAX_UNTIL:
|
---|
1190 | /* maximizing repeat */
|
---|
1191 | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
|
---|
1192 |
|
---|
1193 | /* FIXME: we probably need to deal with zero-width
|
---|
1194 | matches in here... */
|
---|
1195 |
|
---|
1196 | ctx->u.rep = state->repeat;
|
---|
1197 | if (!ctx->u.rep)
|
---|
1198 | RETURN_ERROR(SRE_ERROR_STATE);
|
---|
1199 |
|
---|
1200 | state->ptr = ctx->ptr;
|
---|
1201 |
|
---|
1202 | ctx->count = ctx->u.rep->count+1;
|
---|
1203 |
|
---|
1204 | TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
|
---|
1205 | ctx->ptr, ctx->count));
|
---|
1206 |
|
---|
1207 | if (ctx->count < ctx->u.rep->pattern[1]) {
|
---|
1208 | /* not enough matches */
|
---|
1209 | ctx->u.rep->count = ctx->count;
|
---|
1210 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
|
---|
1211 | ctx->u.rep->pattern+3);
|
---|
1212 | if (ret) {
|
---|
1213 | RETURN_ON_ERROR(ret);
|
---|
1214 | RETURN_SUCCESS;
|
---|
1215 | }
|
---|
1216 | ctx->u.rep->count = ctx->count-1;
|
---|
1217 | state->ptr = ctx->ptr;
|
---|
1218 | RETURN_FAILURE;
|
---|
1219 | }
|
---|
1220 |
|
---|
1221 | if ((ctx->count < ctx->u.rep->pattern[2] ||
|
---|
1222 | ctx->u.rep->pattern[2] == 65535) &&
|
---|
1223 | state->ptr != ctx->u.rep->last_ptr) {
|
---|
1224 | /* we may have enough matches, but if we can
|
---|
1225 | match another item, do so */
|
---|
1226 | ctx->u.rep->count = ctx->count;
|
---|
1227 | LASTMARK_SAVE();
|
---|
1228 | MARK_PUSH(ctx->lastmark);
|
---|
1229 | /* zero-width match protection */
|
---|
1230 | DATA_PUSH(&ctx->u.rep->last_ptr);
|
---|
1231 | ctx->u.rep->last_ptr = state->ptr;
|
---|
1232 | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
|
---|
1233 | ctx->u.rep->pattern+3);
|
---|
1234 | DATA_POP(&ctx->u.rep->last_ptr);
|
---|
1235 | if (ret) {
|
---|
1236 | MARK_POP_DISCARD(ctx->lastmark);
|
---|
1237 | RETURN_ON_ERROR(ret);
|
---|
1238 | RETURN_SUCCESS;
|
---|
1239 | }
|
---|
1240 | MARK_POP(ctx->lastmark);
|
---|
1241 | LASTMARK_RESTORE();
|
---|
1242 | ctx->u.rep->count = ctx->count-1;
|
---|
1243 | state->ptr = ctx->ptr;
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 | /* cannot match more repeated items here. make sure the
|
---|
1247 | tail matches */
|
---|
1248 | state->repeat = ctx->u.rep->prev;
|
---|
1249 | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
|
---|
1250 | RETURN_ON_SUCCESS(ret);
|
---|
1251 | state->repeat = ctx->u.rep;
|
---|
1252 | state->ptr = ctx->ptr;
|
---|
1253 | RETURN_FAILURE;
|
---|
1254 |
|
---|
1255 | case SRE_OP_MIN_UNTIL:
|
---|
1256 | /* minimizing repeat */
|
---|
1257 | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
|
---|
1258 |
|
---|
1259 | ctx->u.rep = state->repeat;
|
---|
1260 | if (!ctx->u.rep)
|
---|
1261 | RETURN_ERROR(SRE_ERROR_STATE);
|
---|
1262 |
|
---|
1263 | state->ptr = ctx->ptr;
|
---|
1264 |
|
---|
1265 | ctx->count = ctx->u.rep->count+1;
|
---|
1266 |
|
---|
1267 | TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
|
---|
1268 | ctx->ptr, ctx->count, ctx->u.rep->pattern));
|
---|
1269 |
|
---|
1270 | if (ctx->count < ctx->u.rep->pattern[1]) {
|
---|
1271 | /* not enough matches */
|
---|
1272 | ctx->u.rep->count = ctx->count;
|
---|
1273 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
|
---|
1274 | ctx->u.rep->pattern+3);
|
---|
1275 | if (ret) {
|
---|
1276 | RETURN_ON_ERROR(ret);
|
---|
1277 | RETURN_SUCCESS;
|
---|
1278 | }
|
---|
1279 | ctx->u.rep->count = ctx->count-1;
|
---|
1280 | state->ptr = ctx->ptr;
|
---|
1281 | RETURN_FAILURE;
|
---|
1282 | }
|
---|
1283 |
|
---|
1284 | LASTMARK_SAVE();
|
---|
1285 |
|
---|
1286 | /* see if the tail matches */
|
---|
1287 | state->repeat = ctx->u.rep->prev;
|
---|
1288 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
|
---|
1289 | if (ret) {
|
---|
1290 | RETURN_ON_ERROR(ret);
|
---|
1291 | RETURN_SUCCESS;
|
---|
1292 | }
|
---|
1293 |
|
---|
1294 | state->repeat = ctx->u.rep;
|
---|
1295 | state->ptr = ctx->ptr;
|
---|
1296 |
|
---|
1297 | LASTMARK_RESTORE();
|
---|
1298 |
|
---|
1299 | if (ctx->count >= ctx->u.rep->pattern[2]
|
---|
1300 | && ctx->u.rep->pattern[2] != 65535)
|
---|
1301 | RETURN_FAILURE;
|
---|
1302 |
|
---|
1303 | ctx->u.rep->count = ctx->count;
|
---|
1304 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
|
---|
1305 | ctx->u.rep->pattern+3);
|
---|
1306 | if (ret) {
|
---|
1307 | RETURN_ON_ERROR(ret);
|
---|
1308 | RETURN_SUCCESS;
|
---|
1309 | }
|
---|
1310 | ctx->u.rep->count = ctx->count-1;
|
---|
1311 | state->ptr = ctx->ptr;
|
---|
1312 | RETURN_FAILURE;
|
---|
1313 |
|
---|
1314 | case SRE_OP_GROUPREF:
|
---|
1315 | /* match backreference */
|
---|
1316 | TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
|
---|
1317 | ctx->ptr, ctx->pattern[0]));
|
---|
1318 | i = ctx->pattern[0];
|
---|
1319 | {
|
---|
1320 | Py_ssize_t groupref = i+i;
|
---|
1321 | if (groupref >= state->lastmark) {
|
---|
1322 | RETURN_FAILURE;
|
---|
1323 | } else {
|
---|
1324 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
|
---|
1325 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
|
---|
1326 | if (!p || !e || e < p)
|
---|
1327 | RETURN_FAILURE;
|
---|
1328 | while (p < e) {
|
---|
1329 | if (ctx->ptr >= end || *ctx->ptr != *p)
|
---|
1330 | RETURN_FAILURE;
|
---|
1331 | p++; ctx->ptr++;
|
---|
1332 | }
|
---|
1333 | }
|
---|
1334 | }
|
---|
1335 | ctx->pattern++;
|
---|
1336 | break;
|
---|
1337 |
|
---|
1338 | case SRE_OP_GROUPREF_IGNORE:
|
---|
1339 | /* match backreference */
|
---|
1340 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
|
---|
1341 | ctx->ptr, ctx->pattern[0]));
|
---|
1342 | i = ctx->pattern[0];
|
---|
1343 | {
|
---|
1344 | Py_ssize_t groupref = i+i;
|
---|
1345 | if (groupref >= state->lastmark) {
|
---|
1346 | RETURN_FAILURE;
|
---|
1347 | } else {
|
---|
1348 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
|
---|
1349 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
|
---|
1350 | if (!p || !e || e < p)
|
---|
1351 | RETURN_FAILURE;
|
---|
1352 | while (p < e) {
|
---|
1353 | if (ctx->ptr >= end ||
|
---|
1354 | state->lower(*ctx->ptr) != state->lower(*p))
|
---|
1355 | RETURN_FAILURE;
|
---|
1356 | p++; ctx->ptr++;
|
---|
1357 | }
|
---|
1358 | }
|
---|
1359 | }
|
---|
1360 | ctx->pattern++;
|
---|
1361 | break;
|
---|
1362 |
|
---|
1363 | case SRE_OP_GROUPREF_EXISTS:
|
---|
1364 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
|
---|
1365 | ctx->ptr, ctx->pattern[0]));
|
---|
1366 | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
|
---|
1367 | i = ctx->pattern[0];
|
---|
1368 | {
|
---|
1369 | Py_ssize_t groupref = i+i;
|
---|
1370 | if (groupref >= state->lastmark) {
|
---|
1371 | ctx->pattern += ctx->pattern[1];
|
---|
1372 | break;
|
---|
1373 | } else {
|
---|
1374 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
|
---|
1375 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
|
---|
1376 | if (!p || !e || e < p) {
|
---|
1377 | ctx->pattern += ctx->pattern[1];
|
---|
1378 | break;
|
---|
1379 | }
|
---|
1380 | }
|
---|
1381 | }
|
---|
1382 | ctx->pattern += 2;
|
---|
1383 | break;
|
---|
1384 |
|
---|
1385 | case SRE_OP_ASSERT:
|
---|
1386 | /* assert subpattern */
|
---|
1387 | /* <ASSERT> <skip> <back> <pattern> */
|
---|
1388 | TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
|
---|
1389 | ctx->ptr, ctx->pattern[1]));
|
---|
1390 | state->ptr = ctx->ptr - ctx->pattern[1];
|
---|
1391 | if (state->ptr < state->beginning)
|
---|
1392 | RETURN_FAILURE;
|
---|
1393 | DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
|
---|
1394 | RETURN_ON_FAILURE(ret);
|
---|
1395 | ctx->pattern += ctx->pattern[0];
|
---|
1396 | break;
|
---|
1397 |
|
---|
1398 | case SRE_OP_ASSERT_NOT:
|
---|
1399 | /* assert not subpattern */
|
---|
1400 | /* <ASSERT_NOT> <skip> <back> <pattern> */
|
---|
1401 | TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
|
---|
1402 | ctx->ptr, ctx->pattern[1]));
|
---|
1403 | state->ptr = ctx->ptr - ctx->pattern[1];
|
---|
1404 | if (state->ptr >= state->beginning) {
|
---|
1405 | DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
|
---|
1406 | if (ret) {
|
---|
1407 | RETURN_ON_ERROR(ret);
|
---|
1408 | RETURN_FAILURE;
|
---|
1409 | }
|
---|
1410 | }
|
---|
1411 | ctx->pattern += ctx->pattern[0];
|
---|
1412 | break;
|
---|
1413 |
|
---|
1414 | case SRE_OP_FAILURE:
|
---|
1415 | /* immediate failure */
|
---|
1416 | TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
|
---|
1417 | RETURN_FAILURE;
|
---|
1418 |
|
---|
1419 | default:
|
---|
1420 | TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
|
---|
1421 | ctx->pattern[-1]));
|
---|
1422 | RETURN_ERROR(SRE_ERROR_ILLEGAL);
|
---|
1423 | }
|
---|
1424 | }
|
---|
1425 |
|
---|
1426 | exit:
|
---|
1427 | ctx_pos = ctx->last_ctx_pos;
|
---|
1428 | jump = ctx->jump;
|
---|
1429 | DATA_POP_DISCARD(ctx);
|
---|
1430 | if (ctx_pos == -1)
|
---|
1431 | return ret;
|
---|
1432 | DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
|
---|
1433 |
|
---|
1434 | switch (jump) {
|
---|
1435 | case JUMP_MAX_UNTIL_2:
|
---|
1436 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
|
---|
1437 | goto jump_max_until_2;
|
---|
1438 | case JUMP_MAX_UNTIL_3:
|
---|
1439 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
|
---|
1440 | goto jump_max_until_3;
|
---|
1441 | case JUMP_MIN_UNTIL_2:
|
---|
1442 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
|
---|
1443 | goto jump_min_until_2;
|
---|
1444 | case JUMP_MIN_UNTIL_3:
|
---|
1445 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
|
---|
1446 | goto jump_min_until_3;
|
---|
1447 | case JUMP_BRANCH:
|
---|
1448 | TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
|
---|
1449 | goto jump_branch;
|
---|
1450 | case JUMP_MAX_UNTIL_1:
|
---|
1451 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
|
---|
1452 | goto jump_max_until_1;
|
---|
1453 | case JUMP_MIN_UNTIL_1:
|
---|
1454 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
|
---|
1455 | goto jump_min_until_1;
|
---|
1456 | case JUMP_REPEAT:
|
---|
1457 | TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
|
---|
1458 | goto jump_repeat;
|
---|
1459 | case JUMP_REPEAT_ONE_1:
|
---|
1460 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
|
---|
1461 | goto jump_repeat_one_1;
|
---|
1462 | case JUMP_REPEAT_ONE_2:
|
---|
1463 | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
|
---|
1464 | goto jump_repeat_one_2;
|
---|
1465 | case JUMP_MIN_REPEAT_ONE:
|
---|
1466 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
|
---|
1467 | goto jump_min_repeat_one;
|
---|
1468 | case JUMP_ASSERT:
|
---|
1469 | TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
|
---|
1470 | goto jump_assert;
|
---|
1471 | case JUMP_ASSERT_NOT:
|
---|
1472 | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
|
---|
1473 | goto jump_assert_not;
|
---|
1474 | case JUMP_NONE:
|
---|
1475 | TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
|
---|
1476 | break;
|
---|
1477 | }
|
---|
1478 |
|
---|
1479 | return ret; /* should never get here */
|
---|
1480 | }
|
---|
1481 |
|
---|
1482 | LOCAL(Py_ssize_t)
|
---|
1483 | SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
---|
1484 | {
|
---|
1485 | SRE_CHAR* ptr = (SRE_CHAR *)state->start;
|
---|
1486 | SRE_CHAR* end = (SRE_CHAR *)state->end;
|
---|
1487 | Py_ssize_t status = 0;
|
---|
1488 | Py_ssize_t prefix_len = 0;
|
---|
1489 | Py_ssize_t prefix_skip = 0;
|
---|
1490 | SRE_CODE* prefix = NULL;
|
---|
1491 | SRE_CODE* charset = NULL;
|
---|
1492 | SRE_CODE* overlap = NULL;
|
---|
1493 | int flags = 0;
|
---|
1494 |
|
---|
1495 | if (pattern[0] == SRE_OP_INFO) {
|
---|
1496 | /* optimization info block */
|
---|
1497 | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
|
---|
1498 |
|
---|
1499 | flags = pattern[2];
|
---|
1500 |
|
---|
1501 | if (pattern[3] > 1) {
|
---|
1502 | /* adjust end point (but make sure we leave at least one
|
---|
1503 | character in there, so literal search will work) */
|
---|
1504 | end -= pattern[3]-1;
|
---|
1505 | if (end <= ptr)
|
---|
1506 | end = ptr+1;
|
---|
1507 | }
|
---|
1508 |
|
---|
1509 | if (flags & SRE_INFO_PREFIX) {
|
---|
1510 | /* pattern starts with a known prefix */
|
---|
1511 | /* <length> <skip> <prefix data> <overlap data> */
|
---|
1512 | prefix_len = pattern[5];
|
---|
1513 | prefix_skip = pattern[6];
|
---|
1514 | prefix = pattern + 7;
|
---|
1515 | overlap = prefix + prefix_len - 1;
|
---|
1516 | } else if (flags & SRE_INFO_CHARSET)
|
---|
1517 | /* pattern starts with a character from a known set */
|
---|
1518 | /* <charset> */
|
---|
1519 | charset = pattern + 5;
|
---|
1520 |
|
---|
1521 | pattern += 1 + pattern[1];
|
---|
1522 | }
|
---|
1523 |
|
---|
1524 | TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
|
---|
1525 | TRACE(("charset = %p\n", charset));
|
---|
1526 |
|
---|
1527 | #if defined(USE_FAST_SEARCH)
|
---|
1528 | if (prefix_len > 1) {
|
---|
1529 | /* pattern starts with a known prefix. use the overlap
|
---|
1530 | table to skip forward as fast as we possibly can */
|
---|
1531 | Py_ssize_t i = 0;
|
---|
1532 | end = (SRE_CHAR *)state->end;
|
---|
1533 | while (ptr < end) {
|
---|
1534 | for (;;) {
|
---|
1535 | if ((SRE_CODE) ptr[0] != prefix[i]) {
|
---|
1536 | if (!i)
|
---|
1537 | break;
|
---|
1538 | else
|
---|
1539 | i = overlap[i];
|
---|
1540 | } else {
|
---|
1541 | if (++i == prefix_len) {
|
---|
1542 | /* found a potential match */
|
---|
1543 | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
|
---|
1544 | state->start = ptr + 1 - prefix_len;
|
---|
1545 | state->ptr = ptr + 1 - prefix_len + prefix_skip;
|
---|
1546 | if (flags & SRE_INFO_LITERAL)
|
---|
1547 | return 1; /* we got all of it */
|
---|
1548 | status = SRE_MATCH(state, pattern + 2*prefix_skip);
|
---|
1549 | if (status != 0)
|
---|
1550 | return status;
|
---|
1551 | /* close but no cigar -- try again */
|
---|
1552 | i = overlap[i];
|
---|
1553 | }
|
---|
1554 | break;
|
---|
1555 | }
|
---|
1556 | }
|
---|
1557 | ptr++;
|
---|
1558 | }
|
---|
1559 | return 0;
|
---|
1560 | }
|
---|
1561 | #endif
|
---|
1562 |
|
---|
1563 | if (pattern[0] == SRE_OP_LITERAL) {
|
---|
1564 | /* pattern starts with a literal character. this is used
|
---|
1565 | for short prefixes, and if fast search is disabled */
|
---|
1566 | SRE_CODE chr = pattern[1];
|
---|
1567 | end = (SRE_CHAR *)state->end;
|
---|
1568 | for (;;) {
|
---|
1569 | while (ptr < end && (SRE_CODE) ptr[0] != chr)
|
---|
1570 | ptr++;
|
---|
1571 | if (ptr >= end)
|
---|
1572 | return 0;
|
---|
1573 | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
|
---|
1574 | state->start = ptr;
|
---|
1575 | state->ptr = ++ptr;
|
---|
1576 | if (flags & SRE_INFO_LITERAL)
|
---|
1577 | return 1; /* we got all of it */
|
---|
1578 | status = SRE_MATCH(state, pattern + 2);
|
---|
1579 | if (status != 0)
|
---|
1580 | break;
|
---|
1581 | }
|
---|
1582 | } else if (charset) {
|
---|
1583 | /* pattern starts with a character from a known set */
|
---|
1584 | end = (SRE_CHAR *)state->end;
|
---|
1585 | for (;;) {
|
---|
1586 | while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
|
---|
1587 | ptr++;
|
---|
1588 | if (ptr >= end)
|
---|
1589 | return 0;
|
---|
1590 | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
|
---|
1591 | state->start = ptr;
|
---|
1592 | state->ptr = ptr;
|
---|
1593 | status = SRE_MATCH(state, pattern);
|
---|
1594 | if (status != 0)
|
---|
1595 | break;
|
---|
1596 | ptr++;
|
---|
1597 | }
|
---|
1598 | } else
|
---|
1599 | /* general case */
|
---|
1600 | while (ptr <= end) {
|
---|
1601 | TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
---|
1602 | state->start = state->ptr = ptr++;
|
---|
1603 | status = SRE_MATCH(state, pattern);
|
---|
1604 | if (status != 0)
|
---|
1605 | break;
|
---|
1606 | }
|
---|
1607 |
|
---|
1608 | return status;
|
---|
1609 | }
|
---|
1610 |
|
---|
1611 | LOCAL(int)
|
---|
1612 | SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
|
---|
1613 | {
|
---|
1614 | /* check if given string is a literal template (i.e. no escapes) */
|
---|
1615 | while (len-- > 0)
|
---|
1616 | if (*ptr++ == '\\')
|
---|
1617 | return 0;
|
---|
1618 | return 1;
|
---|
1619 | }
|
---|
1620 |
|
---|
1621 | #if !defined(SRE_RECURSIVE)
|
---|
1622 |
|
---|
1623 | /* -------------------------------------------------------------------- */
|
---|
1624 | /* factories and destructors */
|
---|
1625 |
|
---|
1626 | /* see sre.h for object declarations */
|
---|
1627 | static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
|
---|
1628 | static PyObject*pattern_scanner(PatternObject*, PyObject*);
|
---|
1629 |
|
---|
1630 | static PyObject *
|
---|
1631 | sre_codesize(PyObject* self, PyObject *unused)
|
---|
1632 | {
|
---|
1633 | return Py_BuildValue("l", sizeof(SRE_CODE));
|
---|
1634 | }
|
---|
1635 |
|
---|
1636 | static PyObject *
|
---|
1637 | sre_getlower(PyObject* self, PyObject* args)
|
---|
1638 | {
|
---|
1639 | int character, flags;
|
---|
1640 | if (!PyArg_ParseTuple(args, "ii", &character, &flags))
|
---|
1641 | return NULL;
|
---|
1642 | if (flags & SRE_FLAG_LOCALE)
|
---|
1643 | return Py_BuildValue("i", sre_lower_locale(character));
|
---|
1644 | if (flags & SRE_FLAG_UNICODE)
|
---|
1645 | #if defined(HAVE_UNICODE)
|
---|
1646 | return Py_BuildValue("i", sre_lower_unicode(character));
|
---|
1647 | #else
|
---|
1648 | return Py_BuildValue("i", sre_lower_locale(character));
|
---|
1649 | #endif
|
---|
1650 | return Py_BuildValue("i", sre_lower(character));
|
---|
1651 | }
|
---|
1652 |
|
---|
1653 | LOCAL(void)
|
---|
1654 | state_reset(SRE_STATE* state)
|
---|
1655 | {
|
---|
1656 | /* FIXME: dynamic! */
|
---|
1657 | /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
|
---|
1658 |
|
---|
1659 | state->lastmark = -1;
|
---|
1660 | state->lastindex = -1;
|
---|
1661 |
|
---|
1662 | state->repeat = NULL;
|
---|
1663 |
|
---|
1664 | data_stack_dealloc(state);
|
---|
1665 | }
|
---|
1666 |
|
---|
1667 | static void*
|
---|
1668 | getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
|
---|
1669 | {
|
---|
1670 | /* given a python object, return a data pointer, a length (in
|
---|
1671 | characters), and a character size. return NULL if the object
|
---|
1672 | is not a string (or not compatible) */
|
---|
1673 |
|
---|
1674 | PyBufferProcs *buffer;
|
---|
1675 | Py_ssize_t size, bytes;
|
---|
1676 | int charsize;
|
---|
1677 | void* ptr;
|
---|
1678 |
|
---|
1679 | #if defined(HAVE_UNICODE)
|
---|
1680 | if (PyUnicode_Check(string)) {
|
---|
1681 | /* unicode strings doesn't always support the buffer interface */
|
---|
1682 | ptr = (void*) PyUnicode_AS_DATA(string);
|
---|
1683 | bytes = PyUnicode_GET_DATA_SIZE(string);
|
---|
1684 | size = PyUnicode_GET_SIZE(string);
|
---|
1685 | charsize = sizeof(Py_UNICODE);
|
---|
1686 |
|
---|
1687 | } else {
|
---|
1688 | #endif
|
---|
1689 |
|
---|
1690 | /* get pointer to string buffer */
|
---|
1691 | buffer = string->ob_type->tp_as_buffer;
|
---|
1692 | if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
|
---|
1693 | buffer->bf_getsegcount(string, NULL) != 1) {
|
---|
1694 | PyErr_SetString(PyExc_TypeError, "expected string or buffer");
|
---|
1695 | return NULL;
|
---|
1696 | }
|
---|
1697 |
|
---|
1698 | /* determine buffer size */
|
---|
1699 | bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
|
---|
1700 | if (bytes < 0) {
|
---|
1701 | PyErr_SetString(PyExc_TypeError, "buffer has negative size");
|
---|
1702 | return NULL;
|
---|
1703 | }
|
---|
1704 |
|
---|
1705 | /* determine character size */
|
---|
1706 | #if PY_VERSION_HEX >= 0x01060000
|
---|
1707 | size = PyObject_Size(string);
|
---|
1708 | #else
|
---|
1709 | size = PyObject_Length(string);
|
---|
1710 | #endif
|
---|
1711 |
|
---|
1712 | if (PyString_Check(string) || bytes == size)
|
---|
1713 | charsize = 1;
|
---|
1714 | #if defined(HAVE_UNICODE)
|
---|
1715 | else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
|
---|
1716 | charsize = sizeof(Py_UNICODE);
|
---|
1717 | #endif
|
---|
1718 | else {
|
---|
1719 | PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
|
---|
1720 | return NULL;
|
---|
1721 | }
|
---|
1722 |
|
---|
1723 | #if defined(HAVE_UNICODE)
|
---|
1724 | }
|
---|
1725 | #endif
|
---|
1726 |
|
---|
1727 | *p_length = size;
|
---|
1728 | *p_charsize = charsize;
|
---|
1729 |
|
---|
1730 | return ptr;
|
---|
1731 | }
|
---|
1732 |
|
---|
1733 | LOCAL(PyObject*)
|
---|
1734 | state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
---|
1735 | Py_ssize_t start, Py_ssize_t end)
|
---|
1736 | {
|
---|
1737 | /* prepare state object */
|
---|
1738 |
|
---|
1739 | Py_ssize_t length;
|
---|
1740 | int charsize;
|
---|
1741 | void* ptr;
|
---|
1742 |
|
---|
1743 | memset(state, 0, sizeof(SRE_STATE));
|
---|
1744 |
|
---|
1745 | state->lastmark = -1;
|
---|
1746 | state->lastindex = -1;
|
---|
1747 |
|
---|
1748 | ptr = getstring(string, &length, &charsize);
|
---|
1749 | if (!ptr)
|
---|
1750 | return NULL;
|
---|
1751 |
|
---|
1752 | /* adjust boundaries */
|
---|
1753 | if (start < 0)
|
---|
1754 | start = 0;
|
---|
1755 | else if (start > length)
|
---|
1756 | start = length;
|
---|
1757 |
|
---|
1758 | if (end < 0)
|
---|
1759 | end = 0;
|
---|
1760 | else if (end > length)
|
---|
1761 | end = length;
|
---|
1762 |
|
---|
1763 | state->charsize = charsize;
|
---|
1764 |
|
---|
1765 | state->beginning = ptr;
|
---|
1766 |
|
---|
1767 | state->start = (void*) ((char*) ptr + start * state->charsize);
|
---|
1768 | state->end = (void*) ((char*) ptr + end * state->charsize);
|
---|
1769 |
|
---|
1770 | Py_INCREF(string);
|
---|
1771 | state->string = string;
|
---|
1772 | state->pos = start;
|
---|
1773 | state->endpos = end;
|
---|
1774 |
|
---|
1775 | if (pattern->flags & SRE_FLAG_LOCALE)
|
---|
1776 | state->lower = sre_lower_locale;
|
---|
1777 | else if (pattern->flags & SRE_FLAG_UNICODE)
|
---|
1778 | #if defined(HAVE_UNICODE)
|
---|
1779 | state->lower = sre_lower_unicode;
|
---|
1780 | #else
|
---|
1781 | state->lower = sre_lower_locale;
|
---|
1782 | #endif
|
---|
1783 | else
|
---|
1784 | state->lower = sre_lower;
|
---|
1785 |
|
---|
1786 | return string;
|
---|
1787 | }
|
---|
1788 |
|
---|
1789 | LOCAL(void)
|
---|
1790 | state_fini(SRE_STATE* state)
|
---|
1791 | {
|
---|
1792 | Py_XDECREF(state->string);
|
---|
1793 | data_stack_dealloc(state);
|
---|
1794 | }
|
---|
1795 |
|
---|
1796 | /* calculate offset from start of string */
|
---|
1797 | #define STATE_OFFSET(state, member)\
|
---|
1798 | (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
|
---|
1799 |
|
---|
1800 | LOCAL(PyObject*)
|
---|
1801 | state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
|
---|
1802 | {
|
---|
1803 | Py_ssize_t i, j;
|
---|
1804 |
|
---|
1805 | index = (index - 1) * 2;
|
---|
1806 |
|
---|
1807 | if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
|
---|
1808 | if (empty)
|
---|
1809 | /* want empty string */
|
---|
1810 | i = j = 0;
|
---|
1811 | else {
|
---|
1812 | Py_INCREF(Py_None);
|
---|
1813 | return Py_None;
|
---|
1814 | }
|
---|
1815 | } else {
|
---|
1816 | i = STATE_OFFSET(state, state->mark[index]);
|
---|
1817 | j = STATE_OFFSET(state, state->mark[index+1]);
|
---|
1818 | }
|
---|
1819 |
|
---|
1820 | return PySequence_GetSlice(string, i, j);
|
---|
1821 | }
|
---|
1822 |
|
---|
1823 | static void
|
---|
1824 | pattern_error(int status)
|
---|
1825 | {
|
---|
1826 | switch (status) {
|
---|
1827 | case SRE_ERROR_RECURSION_LIMIT:
|
---|
1828 | PyErr_SetString(
|
---|
1829 | PyExc_RuntimeError,
|
---|
1830 | "maximum recursion limit exceeded"
|
---|
1831 | );
|
---|
1832 | break;
|
---|
1833 | case SRE_ERROR_MEMORY:
|
---|
1834 | PyErr_NoMemory();
|
---|
1835 | break;
|
---|
1836 | default:
|
---|
1837 | /* other error codes indicate compiler/engine bugs */
|
---|
1838 | PyErr_SetString(
|
---|
1839 | PyExc_RuntimeError,
|
---|
1840 | "internal error in regular expression engine"
|
---|
1841 | );
|
---|
1842 | }
|
---|
1843 | }
|
---|
1844 |
|
---|
1845 | static void
|
---|
1846 | pattern_dealloc(PatternObject* self)
|
---|
1847 | {
|
---|
1848 | if (self->weakreflist != NULL)
|
---|
1849 | PyObject_ClearWeakRefs((PyObject *) self);
|
---|
1850 | Py_XDECREF(self->pattern);
|
---|
1851 | Py_XDECREF(self->groupindex);
|
---|
1852 | Py_XDECREF(self->indexgroup);
|
---|
1853 | PyObject_DEL(self);
|
---|
1854 | }
|
---|
1855 |
|
---|
1856 | static PyObject*
|
---|
1857 | pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
1858 | {
|
---|
1859 | SRE_STATE state;
|
---|
1860 | int status;
|
---|
1861 |
|
---|
1862 | PyObject* string;
|
---|
1863 | Py_ssize_t start = 0;
|
---|
1864 | Py_ssize_t end = PY_SSIZE_T_MAX;
|
---|
1865 | static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
|
---|
1866 | if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:match", kwlist,
|
---|
1867 | &string, &start, &end))
|
---|
1868 | return NULL;
|
---|
1869 |
|
---|
1870 | string = state_init(&state, self, string, start, end);
|
---|
1871 | if (!string)
|
---|
1872 | return NULL;
|
---|
1873 |
|
---|
1874 | state.ptr = state.start;
|
---|
1875 |
|
---|
1876 | TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
|
---|
1877 |
|
---|
1878 | if (state.charsize == 1) {
|
---|
1879 | status = sre_match(&state, PatternObject_GetCode(self));
|
---|
1880 | } else {
|
---|
1881 | #if defined(HAVE_UNICODE)
|
---|
1882 | status = sre_umatch(&state, PatternObject_GetCode(self));
|
---|
1883 | #endif
|
---|
1884 | }
|
---|
1885 |
|
---|
1886 | TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
---|
1887 |
|
---|
1888 | state_fini(&state);
|
---|
1889 |
|
---|
1890 | return pattern_new_match(self, &state, status);
|
---|
1891 | }
|
---|
1892 |
|
---|
1893 | static PyObject*
|
---|
1894 | pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
1895 | {
|
---|
1896 | SRE_STATE state;
|
---|
1897 | int status;
|
---|
1898 |
|
---|
1899 | PyObject* string;
|
---|
1900 | Py_ssize_t start = 0;
|
---|
1901 | Py_ssize_t end = PY_SSIZE_T_MAX;
|
---|
1902 | static char* kwlist[] = { "pattern", "pos", "endpos", NULL };
|
---|
1903 | if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:search", kwlist,
|
---|
1904 | &string, &start, &end))
|
---|
1905 | return NULL;
|
---|
1906 |
|
---|
1907 | string = state_init(&state, self, string, start, end);
|
---|
1908 | if (!string)
|
---|
1909 | return NULL;
|
---|
1910 |
|
---|
1911 | TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
|
---|
1912 |
|
---|
1913 | if (state.charsize == 1) {
|
---|
1914 | status = sre_search(&state, PatternObject_GetCode(self));
|
---|
1915 | } else {
|
---|
1916 | #if defined(HAVE_UNICODE)
|
---|
1917 | status = sre_usearch(&state, PatternObject_GetCode(self));
|
---|
1918 | #endif
|
---|
1919 | }
|
---|
1920 |
|
---|
1921 | TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
---|
1922 |
|
---|
1923 | state_fini(&state);
|
---|
1924 |
|
---|
1925 | return pattern_new_match(self, &state, status);
|
---|
1926 | }
|
---|
1927 |
|
---|
1928 | static PyObject*
|
---|
1929 | call(char* module, char* function, PyObject* args)
|
---|
1930 | {
|
---|
1931 | PyObject* name;
|
---|
1932 | PyObject* mod;
|
---|
1933 | PyObject* func;
|
---|
1934 | PyObject* result;
|
---|
1935 |
|
---|
1936 | if (!args)
|
---|
1937 | return NULL;
|
---|
1938 | name = PyString_FromString(module);
|
---|
1939 | if (!name)
|
---|
1940 | return NULL;
|
---|
1941 | mod = PyImport_Import(name);
|
---|
1942 | Py_DECREF(name);
|
---|
1943 | if (!mod)
|
---|
1944 | return NULL;
|
---|
1945 | func = PyObject_GetAttrString(mod, function);
|
---|
1946 | Py_DECREF(mod);
|
---|
1947 | if (!func)
|
---|
1948 | return NULL;
|
---|
1949 | result = PyObject_CallObject(func, args);
|
---|
1950 | Py_DECREF(func);
|
---|
1951 | Py_DECREF(args);
|
---|
1952 | return result;
|
---|
1953 | }
|
---|
1954 |
|
---|
1955 | #ifdef USE_BUILTIN_COPY
|
---|
1956 | static int
|
---|
1957 | deepcopy(PyObject** object, PyObject* memo)
|
---|
1958 | {
|
---|
1959 | PyObject* copy;
|
---|
1960 |
|
---|
1961 | copy = call(
|
---|
1962 | "copy", "deepcopy",
|
---|
1963 | PyTuple_Pack(2, *object, memo)
|
---|
1964 | );
|
---|
1965 | if (!copy)
|
---|
1966 | return 0;
|
---|
1967 |
|
---|
1968 | Py_DECREF(*object);
|
---|
1969 | *object = copy;
|
---|
1970 |
|
---|
1971 | return 1; /* success */
|
---|
1972 | }
|
---|
1973 | #endif
|
---|
1974 |
|
---|
1975 | static PyObject*
|
---|
1976 | join_list(PyObject* list, PyObject* pattern)
|
---|
1977 | {
|
---|
1978 | /* join list elements */
|
---|
1979 |
|
---|
1980 | PyObject* joiner;
|
---|
1981 | #if PY_VERSION_HEX >= 0x01060000
|
---|
1982 | PyObject* function;
|
---|
1983 | PyObject* args;
|
---|
1984 | #endif
|
---|
1985 | PyObject* result;
|
---|
1986 |
|
---|
1987 | switch (PyList_GET_SIZE(list)) {
|
---|
1988 | case 0:
|
---|
1989 | Py_DECREF(list);
|
---|
1990 | return PySequence_GetSlice(pattern, 0, 0);
|
---|
1991 | case 1:
|
---|
1992 | result = PyList_GET_ITEM(list, 0);
|
---|
1993 | Py_INCREF(result);
|
---|
1994 | Py_DECREF(list);
|
---|
1995 | return result;
|
---|
1996 | }
|
---|
1997 |
|
---|
1998 | /* two or more elements: slice out a suitable separator from the
|
---|
1999 | first member, and use that to join the entire list */
|
---|
2000 |
|
---|
2001 | joiner = PySequence_GetSlice(pattern, 0, 0);
|
---|
2002 | if (!joiner)
|
---|
2003 | return NULL;
|
---|
2004 |
|
---|
2005 | #if PY_VERSION_HEX >= 0x01060000
|
---|
2006 | function = PyObject_GetAttrString(joiner, "join");
|
---|
2007 | if (!function) {
|
---|
2008 | Py_DECREF(joiner);
|
---|
2009 | return NULL;
|
---|
2010 | }
|
---|
2011 | args = PyTuple_New(1);
|
---|
2012 | if (!args) {
|
---|
2013 | Py_DECREF(function);
|
---|
2014 | Py_DECREF(joiner);
|
---|
2015 | return NULL;
|
---|
2016 | }
|
---|
2017 | PyTuple_SET_ITEM(args, 0, list);
|
---|
2018 | result = PyObject_CallObject(function, args);
|
---|
2019 | Py_DECREF(args); /* also removes list */
|
---|
2020 | Py_DECREF(function);
|
---|
2021 | #else
|
---|
2022 | result = call(
|
---|
2023 | "string", "join",
|
---|
2024 | PyTuple_Pack(2, list, joiner)
|
---|
2025 | );
|
---|
2026 | #endif
|
---|
2027 | Py_DECREF(joiner);
|
---|
2028 |
|
---|
2029 | return result;
|
---|
2030 | }
|
---|
2031 |
|
---|
2032 | static PyObject*
|
---|
2033 | pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
2034 | {
|
---|
2035 | SRE_STATE state;
|
---|
2036 | PyObject* list;
|
---|
2037 | int status;
|
---|
2038 | Py_ssize_t i, b, e;
|
---|
2039 |
|
---|
2040 | PyObject* string;
|
---|
2041 | Py_ssize_t start = 0;
|
---|
2042 | Py_ssize_t end = PY_SSIZE_T_MAX;
|
---|
2043 | static char* kwlist[] = { "source", "pos", "endpos", NULL };
|
---|
2044 | if (!PyArg_ParseTupleAndKeywords(args, kw, "O|nn:findall", kwlist,
|
---|
2045 | &string, &start, &end))
|
---|
2046 | return NULL;
|
---|
2047 |
|
---|
2048 | string = state_init(&state, self, string, start, end);
|
---|
2049 | if (!string)
|
---|
2050 | return NULL;
|
---|
2051 |
|
---|
2052 | list = PyList_New(0);
|
---|
2053 | if (!list) {
|
---|
2054 | state_fini(&state);
|
---|
2055 | return NULL;
|
---|
2056 | }
|
---|
2057 |
|
---|
2058 | while (state.start <= state.end) {
|
---|
2059 |
|
---|
2060 | PyObject* item;
|
---|
2061 |
|
---|
2062 | state_reset(&state);
|
---|
2063 |
|
---|
2064 | state.ptr = state.start;
|
---|
2065 |
|
---|
2066 | if (state.charsize == 1) {
|
---|
2067 | status = sre_search(&state, PatternObject_GetCode(self));
|
---|
2068 | } else {
|
---|
2069 | #if defined(HAVE_UNICODE)
|
---|
2070 | status = sre_usearch(&state, PatternObject_GetCode(self));
|
---|
2071 | #endif
|
---|
2072 | }
|
---|
2073 |
|
---|
2074 | if (status <= 0) {
|
---|
2075 | if (status == 0)
|
---|
2076 | break;
|
---|
2077 | pattern_error(status);
|
---|
2078 | goto error;
|
---|
2079 | }
|
---|
2080 |
|
---|
2081 | /* don't bother to build a match object */
|
---|
2082 | switch (self->groups) {
|
---|
2083 | case 0:
|
---|
2084 | b = STATE_OFFSET(&state, state.start);
|
---|
2085 | e = STATE_OFFSET(&state, state.ptr);
|
---|
2086 | item = PySequence_GetSlice(string, b, e);
|
---|
2087 | if (!item)
|
---|
2088 | goto error;
|
---|
2089 | break;
|
---|
2090 | case 1:
|
---|
2091 | item = state_getslice(&state, 1, string, 1);
|
---|
2092 | if (!item)
|
---|
2093 | goto error;
|
---|
2094 | break;
|
---|
2095 | default:
|
---|
2096 | item = PyTuple_New(self->groups);
|
---|
2097 | if (!item)
|
---|
2098 | goto error;
|
---|
2099 | for (i = 0; i < self->groups; i++) {
|
---|
2100 | PyObject* o = state_getslice(&state, i+1, string, 1);
|
---|
2101 | if (!o) {
|
---|
2102 | Py_DECREF(item);
|
---|
2103 | goto error;
|
---|
2104 | }
|
---|
2105 | PyTuple_SET_ITEM(item, i, o);
|
---|
2106 | }
|
---|
2107 | break;
|
---|
2108 | }
|
---|
2109 |
|
---|
2110 | status = PyList_Append(list, item);
|
---|
2111 | Py_DECREF(item);
|
---|
2112 | if (status < 0)
|
---|
2113 | goto error;
|
---|
2114 |
|
---|
2115 | if (state.ptr == state.start)
|
---|
2116 | state.start = (void*) ((char*) state.ptr + state.charsize);
|
---|
2117 | else
|
---|
2118 | state.start = state.ptr;
|
---|
2119 |
|
---|
2120 | }
|
---|
2121 |
|
---|
2122 | state_fini(&state);
|
---|
2123 | return list;
|
---|
2124 |
|
---|
2125 | error:
|
---|
2126 | Py_DECREF(list);
|
---|
2127 | state_fini(&state);
|
---|
2128 | return NULL;
|
---|
2129 |
|
---|
2130 | }
|
---|
2131 |
|
---|
2132 | #if PY_VERSION_HEX >= 0x02020000
|
---|
2133 | static PyObject*
|
---|
2134 | pattern_finditer(PatternObject* pattern, PyObject* args)
|
---|
2135 | {
|
---|
2136 | PyObject* scanner;
|
---|
2137 | PyObject* search;
|
---|
2138 | PyObject* iterator;
|
---|
2139 |
|
---|
2140 | scanner = pattern_scanner(pattern, args);
|
---|
2141 | if (!scanner)
|
---|
2142 | return NULL;
|
---|
2143 |
|
---|
2144 | search = PyObject_GetAttrString(scanner, "search");
|
---|
2145 | Py_DECREF(scanner);
|
---|
2146 | if (!search)
|
---|
2147 | return NULL;
|
---|
2148 |
|
---|
2149 | iterator = PyCallIter_New(search, Py_None);
|
---|
2150 | Py_DECREF(search);
|
---|
2151 |
|
---|
2152 | return iterator;
|
---|
2153 | }
|
---|
2154 | #endif
|
---|
2155 |
|
---|
2156 | static PyObject*
|
---|
2157 | pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
2158 | {
|
---|
2159 | SRE_STATE state;
|
---|
2160 | PyObject* list;
|
---|
2161 | PyObject* item;
|
---|
2162 | int status;
|
---|
2163 | Py_ssize_t n;
|
---|
2164 | Py_ssize_t i;
|
---|
2165 | void* last;
|
---|
2166 |
|
---|
2167 | PyObject* string;
|
---|
2168 | Py_ssize_t maxsplit = 0;
|
---|
2169 | static char* kwlist[] = { "source", "maxsplit", NULL };
|
---|
2170 | if (!PyArg_ParseTupleAndKeywords(args, kw, "O|n:split", kwlist,
|
---|
2171 | &string, &maxsplit))
|
---|
2172 | return NULL;
|
---|
2173 |
|
---|
2174 | string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
|
---|
2175 | if (!string)
|
---|
2176 | return NULL;
|
---|
2177 |
|
---|
2178 | list = PyList_New(0);
|
---|
2179 | if (!list) {
|
---|
2180 | state_fini(&state);
|
---|
2181 | return NULL;
|
---|
2182 | }
|
---|
2183 |
|
---|
2184 | n = 0;
|
---|
2185 | last = state.start;
|
---|
2186 |
|
---|
2187 | while (!maxsplit || n < maxsplit) {
|
---|
2188 |
|
---|
2189 | state_reset(&state);
|
---|
2190 |
|
---|
2191 | state.ptr = state.start;
|
---|
2192 |
|
---|
2193 | if (state.charsize == 1) {
|
---|
2194 | status = sre_search(&state, PatternObject_GetCode(self));
|
---|
2195 | } else {
|
---|
2196 | #if defined(HAVE_UNICODE)
|
---|
2197 | status = sre_usearch(&state, PatternObject_GetCode(self));
|
---|
2198 | #endif
|
---|
2199 | }
|
---|
2200 |
|
---|
2201 | if (status <= 0) {
|
---|
2202 | if (status == 0)
|
---|
2203 | break;
|
---|
2204 | pattern_error(status);
|
---|
2205 | goto error;
|
---|
2206 | }
|
---|
2207 |
|
---|
2208 | if (state.start == state.ptr) {
|
---|
2209 | if (last == state.end)
|
---|
2210 | break;
|
---|
2211 | /* skip one character */
|
---|
2212 | state.start = (void*) ((char*) state.ptr + state.charsize);
|
---|
2213 | continue;
|
---|
2214 | }
|
---|
2215 |
|
---|
2216 | /* get segment before this match */
|
---|
2217 | item = PySequence_GetSlice(
|
---|
2218 | string, STATE_OFFSET(&state, last),
|
---|
2219 | STATE_OFFSET(&state, state.start)
|
---|
2220 | );
|
---|
2221 | if (!item)
|
---|
2222 | goto error;
|
---|
2223 | status = PyList_Append(list, item);
|
---|
2224 | Py_DECREF(item);
|
---|
2225 | if (status < 0)
|
---|
2226 | goto error;
|
---|
2227 |
|
---|
2228 | /* add groups (if any) */
|
---|
2229 | for (i = 0; i < self->groups; i++) {
|
---|
2230 | item = state_getslice(&state, i+1, string, 0);
|
---|
2231 | if (!item)
|
---|
2232 | goto error;
|
---|
2233 | status = PyList_Append(list, item);
|
---|
2234 | Py_DECREF(item);
|
---|
2235 | if (status < 0)
|
---|
2236 | goto error;
|
---|
2237 | }
|
---|
2238 |
|
---|
2239 | n = n + 1;
|
---|
2240 |
|
---|
2241 | last = state.start = state.ptr;
|
---|
2242 |
|
---|
2243 | }
|
---|
2244 |
|
---|
2245 | /* get segment following last match (even if empty) */
|
---|
2246 | item = PySequence_GetSlice(
|
---|
2247 | string, STATE_OFFSET(&state, last), state.endpos
|
---|
2248 | );
|
---|
2249 | if (!item)
|
---|
2250 | goto error;
|
---|
2251 | status = PyList_Append(list, item);
|
---|
2252 | Py_DECREF(item);
|
---|
2253 | if (status < 0)
|
---|
2254 | goto error;
|
---|
2255 |
|
---|
2256 | state_fini(&state);
|
---|
2257 | return list;
|
---|
2258 |
|
---|
2259 | error:
|
---|
2260 | Py_DECREF(list);
|
---|
2261 | state_fini(&state);
|
---|
2262 | return NULL;
|
---|
2263 |
|
---|
2264 | }
|
---|
2265 |
|
---|
2266 | static PyObject*
|
---|
2267 | pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
---|
2268 | Py_ssize_t count, Py_ssize_t subn)
|
---|
2269 | {
|
---|
2270 | SRE_STATE state;
|
---|
2271 | PyObject* list;
|
---|
2272 | PyObject* item;
|
---|
2273 | PyObject* filter;
|
---|
2274 | PyObject* args;
|
---|
2275 | PyObject* match;
|
---|
2276 | void* ptr;
|
---|
2277 | int status;
|
---|
2278 | Py_ssize_t n;
|
---|
2279 | Py_ssize_t i, b, e;
|
---|
2280 | int bint;
|
---|
2281 | int filter_is_callable;
|
---|
2282 |
|
---|
2283 | if (PyCallable_Check(ptemplate)) {
|
---|
2284 | /* sub/subn takes either a function or a template */
|
---|
2285 | filter = ptemplate;
|
---|
2286 | Py_INCREF(filter);
|
---|
2287 | filter_is_callable = 1;
|
---|
2288 | } else {
|
---|
2289 | /* if not callable, check if it's a literal string */
|
---|
2290 | int literal;
|
---|
2291 | ptr = getstring(ptemplate, &n, &bint);
|
---|
2292 | b = bint;
|
---|
2293 | if (ptr) {
|
---|
2294 | if (b == 1) {
|
---|
2295 | literal = sre_literal_template((unsigned char *)ptr, n);
|
---|
2296 | } else {
|
---|
2297 | #if defined(HAVE_UNICODE)
|
---|
2298 | literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
|
---|
2299 | #endif
|
---|
2300 | }
|
---|
2301 | } else {
|
---|
2302 | PyErr_Clear();
|
---|
2303 | literal = 0;
|
---|
2304 | }
|
---|
2305 | if (literal) {
|
---|
2306 | filter = ptemplate;
|
---|
2307 | Py_INCREF(filter);
|
---|
2308 | filter_is_callable = 0;
|
---|
2309 | } else {
|
---|
2310 | /* not a literal; hand it over to the template compiler */
|
---|
2311 | filter = call(
|
---|
2312 | SRE_PY_MODULE, "_subx",
|
---|
2313 | PyTuple_Pack(2, self, ptemplate)
|
---|
2314 | );
|
---|
2315 | if (!filter)
|
---|
2316 | return NULL;
|
---|
2317 | filter_is_callable = PyCallable_Check(filter);
|
---|
2318 | }
|
---|
2319 | }
|
---|
2320 |
|
---|
2321 | string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
|
---|
2322 | if (!string) {
|
---|
2323 | Py_DECREF(filter);
|
---|
2324 | return NULL;
|
---|
2325 | }
|
---|
2326 |
|
---|
2327 | list = PyList_New(0);
|
---|
2328 | if (!list) {
|
---|
2329 | Py_DECREF(filter);
|
---|
2330 | state_fini(&state);
|
---|
2331 | return NULL;
|
---|
2332 | }
|
---|
2333 |
|
---|
2334 | n = i = 0;
|
---|
2335 |
|
---|
2336 | while (!count || n < count) {
|
---|
2337 |
|
---|
2338 | state_reset(&state);
|
---|
2339 |
|
---|
2340 | state.ptr = state.start;
|
---|
2341 |
|
---|
2342 | if (state.charsize == 1) {
|
---|
2343 | status = sre_search(&state, PatternObject_GetCode(self));
|
---|
2344 | } else {
|
---|
2345 | #if defined(HAVE_UNICODE)
|
---|
2346 | status = sre_usearch(&state, PatternObject_GetCode(self));
|
---|
2347 | #endif
|
---|
2348 | }
|
---|
2349 |
|
---|
2350 | if (status <= 0) {
|
---|
2351 | if (status == 0)
|
---|
2352 | break;
|
---|
2353 | pattern_error(status);
|
---|
2354 | goto error;
|
---|
2355 | }
|
---|
2356 |
|
---|
2357 | b = STATE_OFFSET(&state, state.start);
|
---|
2358 | e = STATE_OFFSET(&state, state.ptr);
|
---|
2359 |
|
---|
2360 | if (i < b) {
|
---|
2361 | /* get segment before this match */
|
---|
2362 | item = PySequence_GetSlice(string, i, b);
|
---|
2363 | if (!item)
|
---|
2364 | goto error;
|
---|
2365 | status = PyList_Append(list, item);
|
---|
2366 | Py_DECREF(item);
|
---|
2367 | if (status < 0)
|
---|
2368 | goto error;
|
---|
2369 |
|
---|
2370 | } else if (i == b && i == e && n > 0)
|
---|
2371 | /* ignore empty match on latest position */
|
---|
2372 | goto next;
|
---|
2373 |
|
---|
2374 | if (filter_is_callable) {
|
---|
2375 | /* pass match object through filter */
|
---|
2376 | match = pattern_new_match(self, &state, 1);
|
---|
2377 | if (!match)
|
---|
2378 | goto error;
|
---|
2379 | args = PyTuple_Pack(1, match);
|
---|
2380 | if (!args) {
|
---|
2381 | Py_DECREF(match);
|
---|
2382 | goto error;
|
---|
2383 | }
|
---|
2384 | item = PyObject_CallObject(filter, args);
|
---|
2385 | Py_DECREF(args);
|
---|
2386 | Py_DECREF(match);
|
---|
2387 | if (!item)
|
---|
2388 | goto error;
|
---|
2389 | } else {
|
---|
2390 | /* filter is literal string */
|
---|
2391 | item = filter;
|
---|
2392 | Py_INCREF(item);
|
---|
2393 | }
|
---|
2394 |
|
---|
2395 | /* add to list */
|
---|
2396 | if (item != Py_None) {
|
---|
2397 | status = PyList_Append(list, item);
|
---|
2398 | Py_DECREF(item);
|
---|
2399 | if (status < 0)
|
---|
2400 | goto error;
|
---|
2401 | }
|
---|
2402 |
|
---|
2403 | i = e;
|
---|
2404 | n = n + 1;
|
---|
2405 |
|
---|
2406 | next:
|
---|
2407 | /* move on */
|
---|
2408 | if (state.ptr == state.start)
|
---|
2409 | state.start = (void*) ((char*) state.ptr + state.charsize);
|
---|
2410 | else
|
---|
2411 | state.start = state.ptr;
|
---|
2412 |
|
---|
2413 | }
|
---|
2414 |
|
---|
2415 | /* get segment following last match */
|
---|
2416 | if (i < state.endpos) {
|
---|
2417 | item = PySequence_GetSlice(string, i, state.endpos);
|
---|
2418 | if (!item)
|
---|
2419 | goto error;
|
---|
2420 | status = PyList_Append(list, item);
|
---|
2421 | Py_DECREF(item);
|
---|
2422 | if (status < 0)
|
---|
2423 | goto error;
|
---|
2424 | }
|
---|
2425 |
|
---|
2426 | state_fini(&state);
|
---|
2427 |
|
---|
2428 | Py_DECREF(filter);
|
---|
2429 |
|
---|
2430 | /* convert list to single string (also removes list) */
|
---|
2431 | item = join_list(list, self->pattern);
|
---|
2432 |
|
---|
2433 | if (!item)
|
---|
2434 | return NULL;
|
---|
2435 |
|
---|
2436 | if (subn)
|
---|
2437 | return Py_BuildValue("Ni", item, n);
|
---|
2438 |
|
---|
2439 | return item;
|
---|
2440 |
|
---|
2441 | error:
|
---|
2442 | Py_DECREF(list);
|
---|
2443 | state_fini(&state);
|
---|
2444 | Py_DECREF(filter);
|
---|
2445 | return NULL;
|
---|
2446 |
|
---|
2447 | }
|
---|
2448 |
|
---|
2449 | static PyObject*
|
---|
2450 | pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
2451 | {
|
---|
2452 | PyObject* ptemplate;
|
---|
2453 | PyObject* string;
|
---|
2454 | Py_ssize_t count = 0;
|
---|
2455 | static char* kwlist[] = { "repl", "string", "count", NULL };
|
---|
2456 | if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
|
---|
2457 | &ptemplate, &string, &count))
|
---|
2458 | return NULL;
|
---|
2459 |
|
---|
2460 | return pattern_subx(self, ptemplate, string, count, 0);
|
---|
2461 | }
|
---|
2462 |
|
---|
2463 | static PyObject*
|
---|
2464 | pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
|
---|
2465 | {
|
---|
2466 | PyObject* ptemplate;
|
---|
2467 | PyObject* string;
|
---|
2468 | Py_ssize_t count = 0;
|
---|
2469 | static char* kwlist[] = { "repl", "string", "count", NULL };
|
---|
2470 | if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
|
---|
2471 | &ptemplate, &string, &count))
|
---|
2472 | return NULL;
|
---|
2473 |
|
---|
2474 | return pattern_subx(self, ptemplate, string, count, 1);
|
---|
2475 | }
|
---|
2476 |
|
---|
2477 | static PyObject*
|
---|
2478 | pattern_copy(PatternObject* self, PyObject *unused)
|
---|
2479 | {
|
---|
2480 | #ifdef USE_BUILTIN_COPY
|
---|
2481 | PatternObject* copy;
|
---|
2482 | int offset;
|
---|
2483 |
|
---|
2484 | copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
|
---|
2485 | if (!copy)
|
---|
2486 | return NULL;
|
---|
2487 |
|
---|
2488 | offset = offsetof(PatternObject, groups);
|
---|
2489 |
|
---|
2490 | Py_XINCREF(self->groupindex);
|
---|
2491 | Py_XINCREF(self->indexgroup);
|
---|
2492 | Py_XINCREF(self->pattern);
|
---|
2493 |
|
---|
2494 | memcpy((char*) copy + offset, (char*) self + offset,
|
---|
2495 | sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
|
---|
2496 | copy->weakreflist = NULL;
|
---|
2497 |
|
---|
2498 | return (PyObject*) copy;
|
---|
2499 | #else
|
---|
2500 | PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
|
---|
2501 | return NULL;
|
---|
2502 | #endif
|
---|
2503 | }
|
---|
2504 |
|
---|
2505 | static PyObject*
|
---|
2506 | pattern_deepcopy(PatternObject* self, PyObject* memo)
|
---|
2507 | {
|
---|
2508 | #ifdef USE_BUILTIN_COPY
|
---|
2509 | PatternObject* copy;
|
---|
2510 |
|
---|
2511 | copy = (PatternObject*) pattern_copy(self);
|
---|
2512 | if (!copy)
|
---|
2513 | return NULL;
|
---|
2514 |
|
---|
2515 | if (!deepcopy(©->groupindex, memo) ||
|
---|
2516 | !deepcopy(©->indexgroup, memo) ||
|
---|
2517 | !deepcopy(©->pattern, memo)) {
|
---|
2518 | Py_DECREF(copy);
|
---|
2519 | return NULL;
|
---|
2520 | }
|
---|
2521 |
|
---|
2522 | #else
|
---|
2523 | PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
|
---|
2524 | return NULL;
|
---|
2525 | #endif
|
---|
2526 | }
|
---|
2527 |
|
---|
2528 | PyDoc_STRVAR(pattern_match_doc,
|
---|
2529 | "match(string[, pos[, endpos]]) --> match object or None.\n\
|
---|
2530 | Matches zero or more characters at the beginning of the string");
|
---|
2531 |
|
---|
2532 | PyDoc_STRVAR(pattern_search_doc,
|
---|
2533 | "search(string[, pos[, endpos]]) --> match object or None.\n\
|
---|
2534 | Scan through string looking for a match, and return a corresponding\n\
|
---|
2535 | MatchObject instance. Return None if no position in the string matches.");
|
---|
2536 |
|
---|
2537 | PyDoc_STRVAR(pattern_split_doc,
|
---|
2538 | "split(string[, maxsplit = 0]) --> list.\n\
|
---|
2539 | Split string by the occurrences of pattern.");
|
---|
2540 |
|
---|
2541 | PyDoc_STRVAR(pattern_findall_doc,
|
---|
2542 | "findall(string[, pos[, endpos]]) --> list.\n\
|
---|
2543 | Return a list of all non-overlapping matches of pattern in string.");
|
---|
2544 |
|
---|
2545 | PyDoc_STRVAR(pattern_finditer_doc,
|
---|
2546 | "finditer(string[, pos[, endpos]]) --> iterator.\n\
|
---|
2547 | Return an iterator over all non-overlapping matches for the \n\
|
---|
2548 | RE pattern in string. For each match, the iterator returns a\n\
|
---|
2549 | match object.");
|
---|
2550 |
|
---|
2551 | PyDoc_STRVAR(pattern_sub_doc,
|
---|
2552 | "sub(repl, string[, count = 0]) --> newstring\n\
|
---|
2553 | Return the string obtained by replacing the leftmost non-overlapping\n\
|
---|
2554 | occurrences of pattern in string by the replacement repl.");
|
---|
2555 |
|
---|
2556 | PyDoc_STRVAR(pattern_subn_doc,
|
---|
2557 | "subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
|
---|
2558 | Return the tuple (new_string, number_of_subs_made) found by replacing\n\
|
---|
2559 | the leftmost non-overlapping occurrences of pattern with the\n\
|
---|
2560 | replacement repl.");
|
---|
2561 |
|
---|
2562 | PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
|
---|
2563 |
|
---|
2564 | static PyMethodDef pattern_methods[] = {
|
---|
2565 | {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
|
---|
2566 | pattern_match_doc},
|
---|
2567 | {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
|
---|
2568 | pattern_search_doc},
|
---|
2569 | {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
|
---|
2570 | pattern_sub_doc},
|
---|
2571 | {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
|
---|
2572 | pattern_subn_doc},
|
---|
2573 | {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
|
---|
2574 | pattern_split_doc},
|
---|
2575 | {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
|
---|
2576 | pattern_findall_doc},
|
---|
2577 | #if PY_VERSION_HEX >= 0x02020000
|
---|
2578 | {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
|
---|
2579 | pattern_finditer_doc},
|
---|
2580 | #endif
|
---|
2581 | {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
|
---|
2582 | {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
|
---|
2583 | {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
|
---|
2584 | {NULL, NULL}
|
---|
2585 | };
|
---|
2586 |
|
---|
2587 | static PyObject*
|
---|
2588 | pattern_getattr(PatternObject* self, char* name)
|
---|
2589 | {
|
---|
2590 | PyObject* res;
|
---|
2591 |
|
---|
2592 | res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
|
---|
2593 |
|
---|
2594 | if (res)
|
---|
2595 | return res;
|
---|
2596 |
|
---|
2597 | PyErr_Clear();
|
---|
2598 |
|
---|
2599 | /* attributes */
|
---|
2600 | if (!strcmp(name, "pattern")) {
|
---|
2601 | Py_INCREF(self->pattern);
|
---|
2602 | return self->pattern;
|
---|
2603 | }
|
---|
2604 |
|
---|
2605 | if (!strcmp(name, "flags"))
|
---|
2606 | return Py_BuildValue("i", self->flags);
|
---|
2607 |
|
---|
2608 | if (!strcmp(name, "groups"))
|
---|
2609 | return Py_BuildValue("i", self->groups);
|
---|
2610 |
|
---|
2611 | if (!strcmp(name, "groupindex") && self->groupindex) {
|
---|
2612 | Py_INCREF(self->groupindex);
|
---|
2613 | return self->groupindex;
|
---|
2614 | }
|
---|
2615 |
|
---|
2616 | PyErr_SetString(PyExc_AttributeError, name);
|
---|
2617 | return NULL;
|
---|
2618 | }
|
---|
2619 |
|
---|
2620 | statichere PyTypeObject Pattern_Type = {
|
---|
2621 | PyObject_HEAD_INIT(NULL)
|
---|
2622 | 0, "_" SRE_MODULE ".SRE_Pattern",
|
---|
2623 | sizeof(PatternObject), sizeof(SRE_CODE),
|
---|
2624 | (destructor)pattern_dealloc, /*tp_dealloc*/
|
---|
2625 | 0, /*tp_print*/
|
---|
2626 | (getattrfunc)pattern_getattr, /*tp_getattr*/
|
---|
2627 | 0, /* tp_setattr */
|
---|
2628 | 0, /* tp_compare */
|
---|
2629 | 0, /* tp_repr */
|
---|
2630 | 0, /* tp_as_number */
|
---|
2631 | 0, /* tp_as_sequence */
|
---|
2632 | 0, /* tp_as_mapping */
|
---|
2633 | 0, /* tp_hash */
|
---|
2634 | 0, /* tp_call */
|
---|
2635 | 0, /* tp_str */
|
---|
2636 | 0, /* tp_getattro */
|
---|
2637 | 0, /* tp_setattro */
|
---|
2638 | 0, /* tp_as_buffer */
|
---|
2639 | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
|
---|
2640 | pattern_doc, /* tp_doc */
|
---|
2641 | 0, /* tp_traverse */
|
---|
2642 | 0, /* tp_clear */
|
---|
2643 | 0, /* tp_richcompare */
|
---|
2644 | offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
|
---|
2645 | };
|
---|
2646 |
|
---|
2647 | static PyObject *
|
---|
2648 | _compile(PyObject* self_, PyObject* args)
|
---|
2649 | {
|
---|
2650 | /* "compile" pattern descriptor to pattern object */
|
---|
2651 |
|
---|
2652 | PatternObject* self;
|
---|
2653 | Py_ssize_t i, n;
|
---|
2654 |
|
---|
2655 | PyObject* pattern;
|
---|
2656 | int flags = 0;
|
---|
2657 | PyObject* code;
|
---|
2658 | Py_ssize_t groups = 0;
|
---|
2659 | PyObject* groupindex = NULL;
|
---|
2660 | PyObject* indexgroup = NULL;
|
---|
2661 | if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
|
---|
2662 | &PyList_Type, &code, &groups,
|
---|
2663 | &groupindex, &indexgroup))
|
---|
2664 | return NULL;
|
---|
2665 |
|
---|
2666 | n = PyList_GET_SIZE(code);
|
---|
2667 |
|
---|
2668 | self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
|
---|
2669 | if (!self)
|
---|
2670 | return NULL;
|
---|
2671 |
|
---|
2672 | self->codesize = n;
|
---|
2673 |
|
---|
2674 | for (i = 0; i < n; i++) {
|
---|
2675 | PyObject *o = PyList_GET_ITEM(code, i);
|
---|
2676 | unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
|
---|
2677 | : PyLong_AsUnsignedLong(o);
|
---|
2678 | self->code[i] = (SRE_CODE) value;
|
---|
2679 | if ((unsigned long) self->code[i] != value) {
|
---|
2680 | PyErr_SetString(PyExc_OverflowError,
|
---|
2681 | "regular expression code size limit exceeded");
|
---|
2682 | break;
|
---|
2683 | }
|
---|
2684 | }
|
---|
2685 |
|
---|
2686 | if (PyErr_Occurred()) {
|
---|
2687 | PyObject_DEL(self);
|
---|
2688 | return NULL;
|
---|
2689 | }
|
---|
2690 |
|
---|
2691 | Py_INCREF(pattern);
|
---|
2692 | self->pattern = pattern;
|
---|
2693 |
|
---|
2694 | self->flags = flags;
|
---|
2695 |
|
---|
2696 | self->groups = groups;
|
---|
2697 |
|
---|
2698 | Py_XINCREF(groupindex);
|
---|
2699 | self->groupindex = groupindex;
|
---|
2700 |
|
---|
2701 | Py_XINCREF(indexgroup);
|
---|
2702 | self->indexgroup = indexgroup;
|
---|
2703 |
|
---|
2704 | self->weakreflist = NULL;
|
---|
2705 |
|
---|
2706 | return (PyObject*) self;
|
---|
2707 | }
|
---|
2708 |
|
---|
2709 | /* -------------------------------------------------------------------- */
|
---|
2710 | /* match methods */
|
---|
2711 |
|
---|
2712 | static void
|
---|
2713 | match_dealloc(MatchObject* self)
|
---|
2714 | {
|
---|
2715 | Py_XDECREF(self->regs);
|
---|
2716 | Py_XDECREF(self->string);
|
---|
2717 | Py_DECREF(self->pattern);
|
---|
2718 | PyObject_DEL(self);
|
---|
2719 | }
|
---|
2720 |
|
---|
2721 | static PyObject*
|
---|
2722 | match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
|
---|
2723 | {
|
---|
2724 | if (index < 0 || index >= self->groups) {
|
---|
2725 | /* raise IndexError if we were given a bad group number */
|
---|
2726 | PyErr_SetString(
|
---|
2727 | PyExc_IndexError,
|
---|
2728 | "no such group"
|
---|
2729 | );
|
---|
2730 | return NULL;
|
---|
2731 | }
|
---|
2732 |
|
---|
2733 | index *= 2;
|
---|
2734 |
|
---|
2735 | if (self->string == Py_None || self->mark[index] < 0) {
|
---|
2736 | /* return default value if the string or group is undefined */
|
---|
2737 | Py_INCREF(def);
|
---|
2738 | return def;
|
---|
2739 | }
|
---|
2740 |
|
---|
2741 | return PySequence_GetSlice(
|
---|
2742 | self->string, self->mark[index], self->mark[index+1]
|
---|
2743 | );
|
---|
2744 | }
|
---|
2745 |
|
---|
2746 | static Py_ssize_t
|
---|
2747 | match_getindex(MatchObject* self, PyObject* index)
|
---|
2748 | {
|
---|
2749 | Py_ssize_t i;
|
---|
2750 |
|
---|
2751 | if (PyInt_Check(index))
|
---|
2752 | return PyInt_AsSsize_t(index);
|
---|
2753 |
|
---|
2754 | i = -1;
|
---|
2755 |
|
---|
2756 | if (self->pattern->groupindex) {
|
---|
2757 | index = PyObject_GetItem(self->pattern->groupindex, index);
|
---|
2758 | if (index) {
|
---|
2759 | if (PyInt_Check(index) || PyLong_Check(index))
|
---|
2760 | i = PyInt_AsSsize_t(index);
|
---|
2761 | Py_DECREF(index);
|
---|
2762 | } else
|
---|
2763 | PyErr_Clear();
|
---|
2764 | }
|
---|
2765 |
|
---|
2766 | return i;
|
---|
2767 | }
|
---|
2768 |
|
---|
2769 | static PyObject*
|
---|
2770 | match_getslice(MatchObject* self, PyObject* index, PyObject* def)
|
---|
2771 | {
|
---|
2772 | return match_getslice_by_index(self, match_getindex(self, index), def);
|
---|
2773 | }
|
---|
2774 |
|
---|
2775 | static PyObject*
|
---|
2776 | match_expand(MatchObject* self, PyObject* ptemplate)
|
---|
2777 | {
|
---|
2778 | /* delegate to Python code */
|
---|
2779 | return call(
|
---|
2780 | SRE_PY_MODULE, "_expand",
|
---|
2781 | PyTuple_Pack(3, self->pattern, self, ptemplate)
|
---|
2782 | );
|
---|
2783 | }
|
---|
2784 |
|
---|
2785 | static PyObject*
|
---|
2786 | match_group(MatchObject* self, PyObject* args)
|
---|
2787 | {
|
---|
2788 | PyObject* result;
|
---|
2789 | Py_ssize_t i, size;
|
---|
2790 |
|
---|
2791 | size = PyTuple_GET_SIZE(args);
|
---|
2792 |
|
---|
2793 | switch (size) {
|
---|
2794 | case 0:
|
---|
2795 | result = match_getslice(self, Py_False, Py_None);
|
---|
2796 | break;
|
---|
2797 | case 1:
|
---|
2798 | result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
|
---|
2799 | break;
|
---|
2800 | default:
|
---|
2801 | /* fetch multiple items */
|
---|
2802 | result = PyTuple_New(size);
|
---|
2803 | if (!result)
|
---|
2804 | return NULL;
|
---|
2805 | for (i = 0; i < size; i++) {
|
---|
2806 | PyObject* item = match_getslice(
|
---|
2807 | self, PyTuple_GET_ITEM(args, i), Py_None
|
---|
2808 | );
|
---|
2809 | if (!item) {
|
---|
2810 | Py_DECREF(result);
|
---|
2811 | return NULL;
|
---|
2812 | }
|
---|
2813 | PyTuple_SET_ITEM(result, i, item);
|
---|
2814 | }
|
---|
2815 | break;
|
---|
2816 | }
|
---|
2817 | return result;
|
---|
2818 | }
|
---|
2819 |
|
---|
2820 | static PyObject*
|
---|
2821 | match_groups(MatchObject* self, PyObject* args, PyObject* kw)
|
---|
2822 | {
|
---|
2823 | PyObject* result;
|
---|
2824 | Py_ssize_t index;
|
---|
2825 |
|
---|
2826 | PyObject* def = Py_None;
|
---|
2827 | static char* kwlist[] = { "default", NULL };
|
---|
2828 | if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
|
---|
2829 | return NULL;
|
---|
2830 |
|
---|
2831 | result = PyTuple_New(self->groups-1);
|
---|
2832 | if (!result)
|
---|
2833 | return NULL;
|
---|
2834 |
|
---|
2835 | for (index = 1; index < self->groups; index++) {
|
---|
2836 | PyObject* item;
|
---|
2837 | item = match_getslice_by_index(self, index, def);
|
---|
2838 | if (!item) {
|
---|
2839 | Py_DECREF(result);
|
---|
2840 | return NULL;
|
---|
2841 | }
|
---|
2842 | PyTuple_SET_ITEM(result, index-1, item);
|
---|
2843 | }
|
---|
2844 |
|
---|
2845 | return result;
|
---|
2846 | }
|
---|
2847 |
|
---|
2848 | static PyObject*
|
---|
2849 | match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
|
---|
2850 | {
|
---|
2851 | PyObject* result;
|
---|
2852 | PyObject* keys;
|
---|
2853 | Py_ssize_t index;
|
---|
2854 |
|
---|
2855 | PyObject* def = Py_None;
|
---|
2856 | static char* kwlist[] = { "default", NULL };
|
---|
2857 | if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
|
---|
2858 | return NULL;
|
---|
2859 |
|
---|
2860 | result = PyDict_New();
|
---|
2861 | if (!result || !self->pattern->groupindex)
|
---|
2862 | return result;
|
---|
2863 |
|
---|
2864 | keys = PyMapping_Keys(self->pattern->groupindex);
|
---|
2865 | if (!keys)
|
---|
2866 | goto failed;
|
---|
2867 |
|
---|
2868 | for (index = 0; index < PyList_GET_SIZE(keys); index++) {
|
---|
2869 | int status;
|
---|
2870 | PyObject* key;
|
---|
2871 | PyObject* value;
|
---|
2872 | key = PyList_GET_ITEM(keys, index);
|
---|
2873 | if (!key)
|
---|
2874 | goto failed;
|
---|
2875 | value = match_getslice(self, key, def);
|
---|
2876 | if (!value) {
|
---|
2877 | Py_DECREF(key);
|
---|
2878 | goto failed;
|
---|
2879 | }
|
---|
2880 | status = PyDict_SetItem(result, key, value);
|
---|
2881 | Py_DECREF(value);
|
---|
2882 | if (status < 0)
|
---|
2883 | goto failed;
|
---|
2884 | }
|
---|
2885 |
|
---|
2886 | Py_DECREF(keys);
|
---|
2887 |
|
---|
2888 | return result;
|
---|
2889 |
|
---|
2890 | failed:
|
---|
2891 | Py_XDECREF(keys);
|
---|
2892 | Py_DECREF(result);
|
---|
2893 | return NULL;
|
---|
2894 | }
|
---|
2895 |
|
---|
2896 | static PyObject*
|
---|
2897 | match_start(MatchObject* self, PyObject* args)
|
---|
2898 | {
|
---|
2899 | Py_ssize_t index;
|
---|
2900 |
|
---|
2901 | PyObject* index_ = Py_False; /* zero */
|
---|
2902 | if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
|
---|
2903 | return NULL;
|
---|
2904 |
|
---|
2905 | index = match_getindex(self, index_);
|
---|
2906 |
|
---|
2907 | if (index < 0 || index >= self->groups) {
|
---|
2908 | PyErr_SetString(
|
---|
2909 | PyExc_IndexError,
|
---|
2910 | "no such group"
|
---|
2911 | );
|
---|
2912 | return NULL;
|
---|
2913 | }
|
---|
2914 |
|
---|
2915 | /* mark is -1 if group is undefined */
|
---|
2916 | return Py_BuildValue("i", self->mark[index*2]);
|
---|
2917 | }
|
---|
2918 |
|
---|
2919 | static PyObject*
|
---|
2920 | match_end(MatchObject* self, PyObject* args)
|
---|
2921 | {
|
---|
2922 | Py_ssize_t index;
|
---|
2923 |
|
---|
2924 | PyObject* index_ = Py_False; /* zero */
|
---|
2925 | if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
|
---|
2926 | return NULL;
|
---|
2927 |
|
---|
2928 | index = match_getindex(self, index_);
|
---|
2929 |
|
---|
2930 | if (index < 0 || index >= self->groups) {
|
---|
2931 | PyErr_SetString(
|
---|
2932 | PyExc_IndexError,
|
---|
2933 | "no such group"
|
---|
2934 | );
|
---|
2935 | return NULL;
|
---|
2936 | }
|
---|
2937 |
|
---|
2938 | /* mark is -1 if group is undefined */
|
---|
2939 | return Py_BuildValue("i", self->mark[index*2+1]);
|
---|
2940 | }
|
---|
2941 |
|
---|
2942 | LOCAL(PyObject*)
|
---|
2943 | _pair(Py_ssize_t i1, Py_ssize_t i2)
|
---|
2944 | {
|
---|
2945 | PyObject* pair;
|
---|
2946 | PyObject* item;
|
---|
2947 |
|
---|
2948 | pair = PyTuple_New(2);
|
---|
2949 | if (!pair)
|
---|
2950 | return NULL;
|
---|
2951 |
|
---|
2952 | item = PyInt_FromSsize_t(i1);
|
---|
2953 | if (!item)
|
---|
2954 | goto error;
|
---|
2955 | PyTuple_SET_ITEM(pair, 0, item);
|
---|
2956 |
|
---|
2957 | item = PyInt_FromSsize_t(i2);
|
---|
2958 | if (!item)
|
---|
2959 | goto error;
|
---|
2960 | PyTuple_SET_ITEM(pair, 1, item);
|
---|
2961 |
|
---|
2962 | return pair;
|
---|
2963 |
|
---|
2964 | error:
|
---|
2965 | Py_DECREF(pair);
|
---|
2966 | return NULL;
|
---|
2967 | }
|
---|
2968 |
|
---|
2969 | static PyObject*
|
---|
2970 | match_span(MatchObject* self, PyObject* args)
|
---|
2971 | {
|
---|
2972 | Py_ssize_t index;
|
---|
2973 |
|
---|
2974 | PyObject* index_ = Py_False; /* zero */
|
---|
2975 | if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
|
---|
2976 | return NULL;
|
---|
2977 |
|
---|
2978 | index = match_getindex(self, index_);
|
---|
2979 |
|
---|
2980 | if (index < 0 || index >= self->groups) {
|
---|
2981 | PyErr_SetString(
|
---|
2982 | PyExc_IndexError,
|
---|
2983 | "no such group"
|
---|
2984 | );
|
---|
2985 | return NULL;
|
---|
2986 | }
|
---|
2987 |
|
---|
2988 | /* marks are -1 if group is undefined */
|
---|
2989 | return _pair(self->mark[index*2], self->mark[index*2+1]);
|
---|
2990 | }
|
---|
2991 |
|
---|
2992 | static PyObject*
|
---|
2993 | match_regs(MatchObject* self)
|
---|
2994 | {
|
---|
2995 | PyObject* regs;
|
---|
2996 | PyObject* item;
|
---|
2997 | Py_ssize_t index;
|
---|
2998 |
|
---|
2999 | regs = PyTuple_New(self->groups);
|
---|
3000 | if (!regs)
|
---|
3001 | return NULL;
|
---|
3002 |
|
---|
3003 | for (index = 0; index < self->groups; index++) {
|
---|
3004 | item = _pair(self->mark[index*2], self->mark[index*2+1]);
|
---|
3005 | if (!item) {
|
---|
3006 | Py_DECREF(regs);
|
---|
3007 | return NULL;
|
---|
3008 | }
|
---|
3009 | PyTuple_SET_ITEM(regs, index, item);
|
---|
3010 | }
|
---|
3011 |
|
---|
3012 | Py_INCREF(regs);
|
---|
3013 | self->regs = regs;
|
---|
3014 |
|
---|
3015 | return regs;
|
---|
3016 | }
|
---|
3017 |
|
---|
3018 | static PyObject*
|
---|
3019 | match_copy(MatchObject* self, PyObject *unused)
|
---|
3020 | {
|
---|
3021 | #ifdef USE_BUILTIN_COPY
|
---|
3022 | MatchObject* copy;
|
---|
3023 | Py_ssize_t slots, offset;
|
---|
3024 |
|
---|
3025 | slots = 2 * (self->pattern->groups+1);
|
---|
3026 |
|
---|
3027 | copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
|
---|
3028 | if (!copy)
|
---|
3029 | return NULL;
|
---|
3030 |
|
---|
3031 | /* this value a constant, but any compiler should be able to
|
---|
3032 | figure that out all by itself */
|
---|
3033 | offset = offsetof(MatchObject, string);
|
---|
3034 |
|
---|
3035 | Py_XINCREF(self->pattern);
|
---|
3036 | Py_XINCREF(self->string);
|
---|
3037 | Py_XINCREF(self->regs);
|
---|
3038 |
|
---|
3039 | memcpy((char*) copy + offset, (char*) self + offset,
|
---|
3040 | sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
|
---|
3041 |
|
---|
3042 | return (PyObject*) copy;
|
---|
3043 | #else
|
---|
3044 | PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
|
---|
3045 | return NULL;
|
---|
3046 | #endif
|
---|
3047 | }
|
---|
3048 |
|
---|
3049 | static PyObject*
|
---|
3050 | match_deepcopy(MatchObject* self, PyObject* memo)
|
---|
3051 | {
|
---|
3052 | #ifdef USE_BUILTIN_COPY
|
---|
3053 | MatchObject* copy;
|
---|
3054 |
|
---|
3055 | copy = (MatchObject*) match_copy(self);
|
---|
3056 | if (!copy)
|
---|
3057 | return NULL;
|
---|
3058 |
|
---|
3059 | if (!deepcopy((PyObject**) ©->pattern, memo) ||
|
---|
3060 | !deepcopy(©->string, memo) ||
|
---|
3061 | !deepcopy(©->regs, memo)) {
|
---|
3062 | Py_DECREF(copy);
|
---|
3063 | return NULL;
|
---|
3064 | }
|
---|
3065 |
|
---|
3066 | #else
|
---|
3067 | PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
|
---|
3068 | return NULL;
|
---|
3069 | #endif
|
---|
3070 | }
|
---|
3071 |
|
---|
3072 | static PyMethodDef match_methods[] = {
|
---|
3073 | {"group", (PyCFunction) match_group, METH_VARARGS},
|
---|
3074 | {"start", (PyCFunction) match_start, METH_VARARGS},
|
---|
3075 | {"end", (PyCFunction) match_end, METH_VARARGS},
|
---|
3076 | {"span", (PyCFunction) match_span, METH_VARARGS},
|
---|
3077 | {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS},
|
---|
3078 | {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS},
|
---|
3079 | {"expand", (PyCFunction) match_expand, METH_O},
|
---|
3080 | {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
|
---|
3081 | {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
|
---|
3082 | {NULL, NULL}
|
---|
3083 | };
|
---|
3084 |
|
---|
3085 | static PyObject*
|
---|
3086 | match_getattr(MatchObject* self, char* name)
|
---|
3087 | {
|
---|
3088 | PyObject* res;
|
---|
3089 |
|
---|
3090 | res = Py_FindMethod(match_methods, (PyObject*) self, name);
|
---|
3091 | if (res)
|
---|
3092 | return res;
|
---|
3093 |
|
---|
3094 | PyErr_Clear();
|
---|
3095 |
|
---|
3096 | if (!strcmp(name, "lastindex")) {
|
---|
3097 | if (self->lastindex >= 0)
|
---|
3098 | return Py_BuildValue("i", self->lastindex);
|
---|
3099 | Py_INCREF(Py_None);
|
---|
3100 | return Py_None;
|
---|
3101 | }
|
---|
3102 |
|
---|
3103 | if (!strcmp(name, "lastgroup")) {
|
---|
3104 | if (self->pattern->indexgroup && self->lastindex >= 0) {
|
---|
3105 | PyObject* result = PySequence_GetItem(
|
---|
3106 | self->pattern->indexgroup, self->lastindex
|
---|
3107 | );
|
---|
3108 | if (result)
|
---|
3109 | return result;
|
---|
3110 | PyErr_Clear();
|
---|
3111 | }
|
---|
3112 | Py_INCREF(Py_None);
|
---|
3113 | return Py_None;
|
---|
3114 | }
|
---|
3115 |
|
---|
3116 | if (!strcmp(name, "string")) {
|
---|
3117 | if (self->string) {
|
---|
3118 | Py_INCREF(self->string);
|
---|
3119 | return self->string;
|
---|
3120 | } else {
|
---|
3121 | Py_INCREF(Py_None);
|
---|
3122 | return Py_None;
|
---|
3123 | }
|
---|
3124 | }
|
---|
3125 |
|
---|
3126 | if (!strcmp(name, "regs")) {
|
---|
3127 | if (self->regs) {
|
---|
3128 | Py_INCREF(self->regs);
|
---|
3129 | return self->regs;
|
---|
3130 | } else
|
---|
3131 | return match_regs(self);
|
---|
3132 | }
|
---|
3133 |
|
---|
3134 | if (!strcmp(name, "re")) {
|
---|
3135 | Py_INCREF(self->pattern);
|
---|
3136 | return (PyObject*) self->pattern;
|
---|
3137 | }
|
---|
3138 |
|
---|
3139 | if (!strcmp(name, "pos"))
|
---|
3140 | return Py_BuildValue("i", self->pos);
|
---|
3141 |
|
---|
3142 | if (!strcmp(name, "endpos"))
|
---|
3143 | return Py_BuildValue("i", self->endpos);
|
---|
3144 |
|
---|
3145 | PyErr_SetString(PyExc_AttributeError, name);
|
---|
3146 | return NULL;
|
---|
3147 | }
|
---|
3148 |
|
---|
3149 | /* FIXME: implement setattr("string", None) as a special case (to
|
---|
3150 | detach the associated string, if any */
|
---|
3151 |
|
---|
3152 | statichere PyTypeObject Match_Type = {
|
---|
3153 | PyObject_HEAD_INIT(NULL)
|
---|
3154 | 0, "_" SRE_MODULE ".SRE_Match",
|
---|
3155 | sizeof(MatchObject), sizeof(Py_ssize_t),
|
---|
3156 | (destructor)match_dealloc, /*tp_dealloc*/
|
---|
3157 | 0, /*tp_print*/
|
---|
3158 | (getattrfunc)match_getattr /*tp_getattr*/
|
---|
3159 | };
|
---|
3160 |
|
---|
3161 | static PyObject*
|
---|
3162 | pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
|
---|
3163 | {
|
---|
3164 | /* create match object (from state object) */
|
---|
3165 |
|
---|
3166 | MatchObject* match;
|
---|
3167 | Py_ssize_t i, j;
|
---|
3168 | char* base;
|
---|
3169 | int n;
|
---|
3170 |
|
---|
3171 | if (status > 0) {
|
---|
3172 |
|
---|
3173 | /* create match object (with room for extra group marks) */
|
---|
3174 | match = PyObject_NEW_VAR(MatchObject, &Match_Type,
|
---|
3175 | 2*(pattern->groups+1));
|
---|
3176 | if (!match)
|
---|
3177 | return NULL;
|
---|
3178 |
|
---|
3179 | Py_INCREF(pattern);
|
---|
3180 | match->pattern = pattern;
|
---|
3181 |
|
---|
3182 | Py_INCREF(state->string);
|
---|
3183 | match->string = state->string;
|
---|
3184 |
|
---|
3185 | match->regs = NULL;
|
---|
3186 | match->groups = pattern->groups+1;
|
---|
3187 |
|
---|
3188 | /* fill in group slices */
|
---|
3189 |
|
---|
3190 | base = (char*) state->beginning;
|
---|
3191 | n = state->charsize;
|
---|
3192 |
|
---|
3193 | match->mark[0] = ((char*) state->start - base) / n;
|
---|
3194 | match->mark[1] = ((char*) state->ptr - base) / n;
|
---|
3195 |
|
---|
3196 | for (i = j = 0; i < pattern->groups; i++, j+=2)
|
---|
3197 | if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
|
---|
3198 | match->mark[j+2] = ((char*) state->mark[j] - base) / n;
|
---|
3199 | match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
|
---|
3200 | } else
|
---|
3201 | match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
|
---|
3202 |
|
---|
3203 | match->pos = state->pos;
|
---|
3204 | match->endpos = state->endpos;
|
---|
3205 |
|
---|
3206 | match->lastindex = state->lastindex;
|
---|
3207 |
|
---|
3208 | return (PyObject*) match;
|
---|
3209 |
|
---|
3210 | } else if (status == 0) {
|
---|
3211 |
|
---|
3212 | /* no match */
|
---|
3213 | Py_INCREF(Py_None);
|
---|
3214 | return Py_None;
|
---|
3215 |
|
---|
3216 | }
|
---|
3217 |
|
---|
3218 | /* internal error */
|
---|
3219 | pattern_error(status);
|
---|
3220 | return NULL;
|
---|
3221 | }
|
---|
3222 |
|
---|
3223 |
|
---|
3224 | /* -------------------------------------------------------------------- */
|
---|
3225 | /* scanner methods (experimental) */
|
---|
3226 |
|
---|
3227 | static void
|
---|
3228 | scanner_dealloc(ScannerObject* self)
|
---|
3229 | {
|
---|
3230 | state_fini(&self->state);
|
---|
3231 | Py_DECREF(self->pattern);
|
---|
3232 | PyObject_DEL(self);
|
---|
3233 | }
|
---|
3234 |
|
---|
3235 | static PyObject*
|
---|
3236 | scanner_match(ScannerObject* self, PyObject *unused)
|
---|
3237 | {
|
---|
3238 | SRE_STATE* state = &self->state;
|
---|
3239 | PyObject* match;
|
---|
3240 | int status;
|
---|
3241 |
|
---|
3242 | state_reset(state);
|
---|
3243 |
|
---|
3244 | state->ptr = state->start;
|
---|
3245 |
|
---|
3246 | if (state->charsize == 1) {
|
---|
3247 | status = sre_match(state, PatternObject_GetCode(self->pattern));
|
---|
3248 | } else {
|
---|
3249 | #if defined(HAVE_UNICODE)
|
---|
3250 | status = sre_umatch(state, PatternObject_GetCode(self->pattern));
|
---|
3251 | #endif
|
---|
3252 | }
|
---|
3253 |
|
---|
3254 | match = pattern_new_match((PatternObject*) self->pattern,
|
---|
3255 | state, status);
|
---|
3256 |
|
---|
3257 | if (status == 0 || state->ptr == state->start)
|
---|
3258 | state->start = (void*) ((char*) state->ptr + state->charsize);
|
---|
3259 | else
|
---|
3260 | state->start = state->ptr;
|
---|
3261 |
|
---|
3262 | return match;
|
---|
3263 | }
|
---|
3264 |
|
---|
3265 |
|
---|
3266 | static PyObject*
|
---|
3267 | scanner_search(ScannerObject* self, PyObject *unused)
|
---|
3268 | {
|
---|
3269 | SRE_STATE* state = &self->state;
|
---|
3270 | PyObject* match;
|
---|
3271 | int status;
|
---|
3272 |
|
---|
3273 | state_reset(state);
|
---|
3274 |
|
---|
3275 | state->ptr = state->start;
|
---|
3276 |
|
---|
3277 | if (state->charsize == 1) {
|
---|
3278 | status = sre_search(state, PatternObject_GetCode(self->pattern));
|
---|
3279 | } else {
|
---|
3280 | #if defined(HAVE_UNICODE)
|
---|
3281 | status = sre_usearch(state, PatternObject_GetCode(self->pattern));
|
---|
3282 | #endif
|
---|
3283 | }
|
---|
3284 |
|
---|
3285 | match = pattern_new_match((PatternObject*) self->pattern,
|
---|
3286 | state, status);
|
---|
3287 |
|
---|
3288 | if (status == 0 || state->ptr == state->start)
|
---|
3289 | state->start = (void*) ((char*) state->ptr + state->charsize);
|
---|
3290 | else
|
---|
3291 | state->start = state->ptr;
|
---|
3292 |
|
---|
3293 | return match;
|
---|
3294 | }
|
---|
3295 |
|
---|
3296 | static PyMethodDef scanner_methods[] = {
|
---|
3297 | {"match", (PyCFunction) scanner_match, METH_NOARGS},
|
---|
3298 | {"search", (PyCFunction) scanner_search, METH_NOARGS},
|
---|
3299 | {NULL, NULL}
|
---|
3300 | };
|
---|
3301 |
|
---|
3302 | static PyObject*
|
---|
3303 | scanner_getattr(ScannerObject* self, char* name)
|
---|
3304 | {
|
---|
3305 | PyObject* res;
|
---|
3306 |
|
---|
3307 | res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
|
---|
3308 | if (res)
|
---|
3309 | return res;
|
---|
3310 |
|
---|
3311 | PyErr_Clear();
|
---|
3312 |
|
---|
3313 | /* attributes */
|
---|
3314 | if (!strcmp(name, "pattern")) {
|
---|
3315 | Py_INCREF(self->pattern);
|
---|
3316 | return self->pattern;
|
---|
3317 | }
|
---|
3318 |
|
---|
3319 | PyErr_SetString(PyExc_AttributeError, name);
|
---|
3320 | return NULL;
|
---|
3321 | }
|
---|
3322 |
|
---|
3323 | statichere PyTypeObject Scanner_Type = {
|
---|
3324 | PyObject_HEAD_INIT(NULL)
|
---|
3325 | 0, "_" SRE_MODULE ".SRE_Scanner",
|
---|
3326 | sizeof(ScannerObject), 0,
|
---|
3327 | (destructor)scanner_dealloc, /*tp_dealloc*/
|
---|
3328 | 0, /*tp_print*/
|
---|
3329 | (getattrfunc)scanner_getattr, /*tp_getattr*/
|
---|
3330 | };
|
---|
3331 |
|
---|
3332 | static PyObject*
|
---|
3333 | pattern_scanner(PatternObject* pattern, PyObject* args)
|
---|
3334 | {
|
---|
3335 | /* create search state object */
|
---|
3336 |
|
---|
3337 | ScannerObject* self;
|
---|
3338 |
|
---|
3339 | PyObject* string;
|
---|
3340 | Py_ssize_t start = 0;
|
---|
3341 | Py_ssize_t end = PY_SSIZE_T_MAX;
|
---|
3342 | if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
|
---|
3343 | return NULL;
|
---|
3344 |
|
---|
3345 | /* create scanner object */
|
---|
3346 | self = PyObject_NEW(ScannerObject, &Scanner_Type);
|
---|
3347 | if (!self)
|
---|
3348 | return NULL;
|
---|
3349 |
|
---|
3350 | string = state_init(&self->state, pattern, string, start, end);
|
---|
3351 | if (!string) {
|
---|
3352 | PyObject_DEL(self);
|
---|
3353 | return NULL;
|
---|
3354 | }
|
---|
3355 |
|
---|
3356 | Py_INCREF(pattern);
|
---|
3357 | self->pattern = (PyObject*) pattern;
|
---|
3358 |
|
---|
3359 | return (PyObject*) self;
|
---|
3360 | }
|
---|
3361 |
|
---|
3362 | static PyMethodDef _functions[] = {
|
---|
3363 | {"compile", _compile, METH_VARARGS},
|
---|
3364 | {"getcodesize", sre_codesize, METH_NOARGS},
|
---|
3365 | {"getlower", sre_getlower, METH_VARARGS},
|
---|
3366 | {NULL, NULL}
|
---|
3367 | };
|
---|
3368 |
|
---|
3369 | #if PY_VERSION_HEX < 0x02030000
|
---|
3370 | DL_EXPORT(void) init_sre(void)
|
---|
3371 | #else
|
---|
3372 | PyMODINIT_FUNC init_sre(void)
|
---|
3373 | #endif
|
---|
3374 | {
|
---|
3375 | PyObject* m;
|
---|
3376 | PyObject* d;
|
---|
3377 | PyObject* x;
|
---|
3378 |
|
---|
3379 | /* Patch object types */
|
---|
3380 | Pattern_Type.ob_type = Match_Type.ob_type =
|
---|
3381 | Scanner_Type.ob_type = &PyType_Type;
|
---|
3382 |
|
---|
3383 | m = Py_InitModule("_" SRE_MODULE, _functions);
|
---|
3384 | if (m == NULL)
|
---|
3385 | return;
|
---|
3386 | d = PyModule_GetDict(m);
|
---|
3387 |
|
---|
3388 | x = PyInt_FromLong(SRE_MAGIC);
|
---|
3389 | if (x) {
|
---|
3390 | PyDict_SetItemString(d, "MAGIC", x);
|
---|
3391 | Py_DECREF(x);
|
---|
3392 | }
|
---|
3393 |
|
---|
3394 | x = PyInt_FromLong(sizeof(SRE_CODE));
|
---|
3395 | if (x) {
|
---|
3396 | PyDict_SetItemString(d, "CODESIZE", x);
|
---|
3397 | Py_DECREF(x);
|
---|
3398 | }
|
---|
3399 |
|
---|
3400 | x = PyString_FromString(copyright);
|
---|
3401 | if (x) {
|
---|
3402 | PyDict_SetItemString(d, "copyright", x);
|
---|
3403 | Py_DECREF(x);
|
---|
3404 | }
|
---|
3405 | }
|
---|
3406 |
|
---|
3407 | #endif /* !defined(SRE_RECURSIVE) */
|
---|
3408 |
|
---|
3409 | /* vim:ts=4:sw=4:et
|
---|
3410 | */
|
---|