1 | /*
|
---|
2 | * Secret Labs' Regular Expression Engine
|
---|
3 | *
|
---|
4 | * regular expression matching engine
|
---|
5 | *
|
---|
6 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
---|
7 | *
|
---|
8 | * See the _sre.c file for information on usage and redistribution.
|
---|
9 | */
|
---|
10 |
|
---|
11 | #ifndef SRE_INCLUDED
|
---|
12 | #define SRE_INCLUDED
|
---|
13 |
|
---|
14 | #include "sre_constants.h"
|
---|
15 |
|
---|
16 | /* size of a code word (must be unsigned short or larger, and
|
---|
17 | large enough to hold a UCS4 character) */
|
---|
18 | #ifdef Py_USING_UNICODE
|
---|
19 | # define SRE_CODE Py_UCS4
|
---|
20 | # if SIZEOF_SIZE_T > 4
|
---|
21 | # define SRE_MAXREPEAT (~(SRE_CODE)0)
|
---|
22 | # else
|
---|
23 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
---|
24 | # endif
|
---|
25 | #else
|
---|
26 | # define SRE_CODE unsigned int
|
---|
27 | # if SIZEOF_SIZE_T > SIZEOF_INT
|
---|
28 | # define SRE_MAXREPEAT (~(SRE_CODE)0)
|
---|
29 | # else
|
---|
30 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
---|
31 | # endif
|
---|
32 | #endif
|
---|
33 |
|
---|
34 | typedef struct {
|
---|
35 | PyObject_VAR_HEAD
|
---|
36 | Py_ssize_t groups; /* must be first! */
|
---|
37 | PyObject* groupindex;
|
---|
38 | PyObject* indexgroup;
|
---|
39 | /* compatibility */
|
---|
40 | PyObject* pattern; /* pattern source (or None) */
|
---|
41 | int flags; /* flags used when compiling pattern source */
|
---|
42 | PyObject *weakreflist; /* List of weak references */
|
---|
43 | /* pattern code */
|
---|
44 | Py_ssize_t codesize;
|
---|
45 | SRE_CODE code[1];
|
---|
46 | } PatternObject;
|
---|
47 |
|
---|
48 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
|
---|
49 |
|
---|
50 | typedef struct {
|
---|
51 | PyObject_VAR_HEAD
|
---|
52 | PyObject* string; /* link to the target string (must be first) */
|
---|
53 | PyObject* regs; /* cached list of matching spans */
|
---|
54 | PatternObject* pattern; /* link to the regex (pattern) object */
|
---|
55 | Py_ssize_t pos, endpos; /* current target slice */
|
---|
56 | Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
|
---|
57 | Py_ssize_t groups; /* number of groups (start/end marks) */
|
---|
58 | Py_ssize_t mark[1];
|
---|
59 | } MatchObject;
|
---|
60 |
|
---|
61 | typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
|
---|
62 |
|
---|
63 | /* FIXME: <fl> shouldn't be a constant, really... */
|
---|
64 | #define SRE_MARK_SIZE 200
|
---|
65 |
|
---|
66 | typedef struct SRE_REPEAT_T {
|
---|
67 | Py_ssize_t count;
|
---|
68 | SRE_CODE* pattern; /* points to REPEAT operator arguments */
|
---|
69 | void* last_ptr; /* helper to check for infinite loops */
|
---|
70 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */
|
---|
71 | } SRE_REPEAT;
|
---|
72 |
|
---|
73 | typedef struct {
|
---|
74 | /* string pointers */
|
---|
75 | void* ptr; /* current position (also end of current slice) */
|
---|
76 | void* beginning; /* start of original string */
|
---|
77 | void* start; /* start of current slice */
|
---|
78 | void* end; /* end of original string */
|
---|
79 | /* attributes for the match object */
|
---|
80 | PyObject* string;
|
---|
81 | Py_ssize_t pos, endpos;
|
---|
82 | /* character size */
|
---|
83 | int charsize;
|
---|
84 | /* registers */
|
---|
85 | Py_ssize_t lastindex;
|
---|
86 | Py_ssize_t lastmark;
|
---|
87 | void* mark[SRE_MARK_SIZE];
|
---|
88 | /* dynamically allocated stuff */
|
---|
89 | char* data_stack;
|
---|
90 | size_t data_stack_size;
|
---|
91 | size_t data_stack_base;
|
---|
92 | /* current repeat context */
|
---|
93 | SRE_REPEAT *repeat;
|
---|
94 | /* hooks */
|
---|
95 | SRE_TOLOWER_HOOK lower;
|
---|
96 | } SRE_STATE;
|
---|
97 |
|
---|
98 | typedef struct {
|
---|
99 | PyObject_HEAD
|
---|
100 | PyObject* pattern;
|
---|
101 | SRE_STATE state;
|
---|
102 | } ScannerObject;
|
---|
103 |
|
---|
104 | #endif
|
---|