1 | /*
|
---|
2 | * Secret Labs' Regular Expression Engine
|
---|
3 | *
|
---|
4 | * regular expression matching engine
|
---|
5 | *
|
---|
6 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
---|
7 | *
|
---|
8 | * See the _sre.c file for information on usage and redistribution.
|
---|
9 | */
|
---|
10 |
|
---|
11 | #ifndef SRE_INCLUDED
|
---|
12 | #define SRE_INCLUDED
|
---|
13 |
|
---|
14 | #include "sre_constants.h"
|
---|
15 |
|
---|
16 | /* size of a code word (must be unsigned short or larger, and
|
---|
17 | large enough to hold a Py_UNICODE character) */
|
---|
18 | #ifdef Py_UNICODE_WIDE
|
---|
19 | #define SRE_CODE Py_UCS4
|
---|
20 | #else
|
---|
21 | #define SRE_CODE unsigned short
|
---|
22 | #endif
|
---|
23 |
|
---|
24 | typedef struct {
|
---|
25 | PyObject_VAR_HEAD
|
---|
26 | Py_ssize_t groups; /* must be first! */
|
---|
27 | PyObject* groupindex;
|
---|
28 | PyObject* indexgroup;
|
---|
29 | /* compatibility */
|
---|
30 | PyObject* pattern; /* pattern source (or None) */
|
---|
31 | int flags; /* flags used when compiling pattern source */
|
---|
32 | PyObject *weakreflist; /* List of weak references */
|
---|
33 | /* pattern code */
|
---|
34 | Py_ssize_t codesize;
|
---|
35 | SRE_CODE code[1];
|
---|
36 | } PatternObject;
|
---|
37 |
|
---|
38 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
|
---|
39 |
|
---|
40 | typedef struct {
|
---|
41 | PyObject_VAR_HEAD
|
---|
42 | PyObject* string; /* link to the target string (must be first) */
|
---|
43 | PyObject* regs; /* cached list of matching spans */
|
---|
44 | PatternObject* pattern; /* link to the regex (pattern) object */
|
---|
45 | Py_ssize_t pos, endpos; /* current target slice */
|
---|
46 | Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
|
---|
47 | Py_ssize_t groups; /* number of groups (start/end marks) */
|
---|
48 | Py_ssize_t mark[1];
|
---|
49 | } MatchObject;
|
---|
50 |
|
---|
51 | typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
|
---|
52 |
|
---|
53 | /* FIXME: <fl> shouldn't be a constant, really... */
|
---|
54 | #define SRE_MARK_SIZE 200
|
---|
55 |
|
---|
56 | typedef struct SRE_REPEAT_T {
|
---|
57 | Py_ssize_t count;
|
---|
58 | SRE_CODE* pattern; /* points to REPEAT operator arguments */
|
---|
59 | void* last_ptr; /* helper to check for infinite loops */
|
---|
60 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */
|
---|
61 | } SRE_REPEAT;
|
---|
62 |
|
---|
63 | typedef struct {
|
---|
64 | /* string pointers */
|
---|
65 | void* ptr; /* current position (also end of current slice) */
|
---|
66 | void* beginning; /* start of original string */
|
---|
67 | void* start; /* start of current slice */
|
---|
68 | void* end; /* end of original string */
|
---|
69 | /* attributes for the match object */
|
---|
70 | PyObject* string;
|
---|
71 | Py_ssize_t pos, endpos;
|
---|
72 | /* character size */
|
---|
73 | int charsize;
|
---|
74 | /* registers */
|
---|
75 | Py_ssize_t lastindex;
|
---|
76 | Py_ssize_t lastmark;
|
---|
77 | void* mark[SRE_MARK_SIZE];
|
---|
78 | /* dynamically allocated stuff */
|
---|
79 | char* data_stack;
|
---|
80 | size_t data_stack_size;
|
---|
81 | size_t data_stack_base;
|
---|
82 | /* current repeat context */
|
---|
83 | SRE_REPEAT *repeat;
|
---|
84 | /* hooks */
|
---|
85 | SRE_TOLOWER_HOOK lower;
|
---|
86 | } SRE_STATE;
|
---|
87 |
|
---|
88 | typedef struct {
|
---|
89 | PyObject_HEAD
|
---|
90 | PyObject* pattern;
|
---|
91 | SRE_STATE state;
|
---|
92 | } ScannerObject;
|
---|
93 |
|
---|
94 | #endif
|
---|