[2] | 1 | /*
|
---|
| 2 | * Secret Labs' Regular Expression Engine
|
---|
| 3 | *
|
---|
| 4 | * regular expression matching engine
|
---|
| 5 | *
|
---|
| 6 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
|
---|
| 7 | *
|
---|
| 8 | * See the _sre.c file for information on usage and redistribution.
|
---|
| 9 | */
|
---|
| 10 |
|
---|
| 11 | #ifndef SRE_INCLUDED
|
---|
| 12 | #define SRE_INCLUDED
|
---|
| 13 |
|
---|
| 14 | #include "sre_constants.h"
|
---|
| 15 |
|
---|
| 16 | /* size of a code word (must be unsigned short or larger, and
|
---|
[391] | 17 | large enough to hold a UCS4 character) */
|
---|
| 18 | #ifdef Py_USING_UNICODE
|
---|
| 19 | # define SRE_CODE Py_UCS4
|
---|
| 20 | # if SIZEOF_SIZE_T > 4
|
---|
| 21 | # define SRE_MAXREPEAT (~(SRE_CODE)0)
|
---|
| 22 | # else
|
---|
| 23 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
---|
| 24 | # endif
|
---|
[2] | 25 | #else
|
---|
[391] | 26 | # define SRE_CODE unsigned int
|
---|
| 27 | # if SIZEOF_SIZE_T > SIZEOF_INT
|
---|
| 28 | # define SRE_MAXREPEAT (~(SRE_CODE)0)
|
---|
| 29 | # else
|
---|
| 30 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
|
---|
| 31 | # endif
|
---|
[2] | 32 | #endif
|
---|
| 33 |
|
---|
| 34 | typedef struct {
|
---|
| 35 | PyObject_VAR_HEAD
|
---|
| 36 | Py_ssize_t groups; /* must be first! */
|
---|
| 37 | PyObject* groupindex;
|
---|
| 38 | PyObject* indexgroup;
|
---|
| 39 | /* compatibility */
|
---|
| 40 | PyObject* pattern; /* pattern source (or None) */
|
---|
| 41 | int flags; /* flags used when compiling pattern source */
|
---|
| 42 | PyObject *weakreflist; /* List of weak references */
|
---|
| 43 | /* pattern code */
|
---|
| 44 | Py_ssize_t codesize;
|
---|
| 45 | SRE_CODE code[1];
|
---|
| 46 | } PatternObject;
|
---|
| 47 |
|
---|
| 48 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
|
---|
| 49 |
|
---|
| 50 | typedef struct {
|
---|
| 51 | PyObject_VAR_HEAD
|
---|
| 52 | PyObject* string; /* link to the target string (must be first) */
|
---|
| 53 | PyObject* regs; /* cached list of matching spans */
|
---|
| 54 | PatternObject* pattern; /* link to the regex (pattern) object */
|
---|
| 55 | Py_ssize_t pos, endpos; /* current target slice */
|
---|
| 56 | Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
|
---|
| 57 | Py_ssize_t groups; /* number of groups (start/end marks) */
|
---|
| 58 | Py_ssize_t mark[1];
|
---|
| 59 | } MatchObject;
|
---|
| 60 |
|
---|
| 61 | typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
|
---|
| 62 |
|
---|
| 63 | /* FIXME: <fl> shouldn't be a constant, really... */
|
---|
| 64 | #define SRE_MARK_SIZE 200
|
---|
| 65 |
|
---|
| 66 | typedef struct SRE_REPEAT_T {
|
---|
| 67 | Py_ssize_t count;
|
---|
| 68 | SRE_CODE* pattern; /* points to REPEAT operator arguments */
|
---|
| 69 | void* last_ptr; /* helper to check for infinite loops */
|
---|
| 70 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */
|
---|
| 71 | } SRE_REPEAT;
|
---|
| 72 |
|
---|
| 73 | typedef struct {
|
---|
| 74 | /* string pointers */
|
---|
| 75 | void* ptr; /* current position (also end of current slice) */
|
---|
| 76 | void* beginning; /* start of original string */
|
---|
| 77 | void* start; /* start of current slice */
|
---|
| 78 | void* end; /* end of original string */
|
---|
| 79 | /* attributes for the match object */
|
---|
| 80 | PyObject* string;
|
---|
| 81 | Py_ssize_t pos, endpos;
|
---|
| 82 | /* character size */
|
---|
| 83 | int charsize;
|
---|
| 84 | /* registers */
|
---|
| 85 | Py_ssize_t lastindex;
|
---|
| 86 | Py_ssize_t lastmark;
|
---|
| 87 | void* mark[SRE_MARK_SIZE];
|
---|
| 88 | /* dynamically allocated stuff */
|
---|
| 89 | char* data_stack;
|
---|
| 90 | size_t data_stack_size;
|
---|
| 91 | size_t data_stack_base;
|
---|
| 92 | /* current repeat context */
|
---|
| 93 | SRE_REPEAT *repeat;
|
---|
| 94 | /* hooks */
|
---|
| 95 | SRE_TOLOWER_HOOK lower;
|
---|
| 96 | } SRE_STATE;
|
---|
| 97 |
|
---|
| 98 | typedef struct {
|
---|
| 99 | PyObject_HEAD
|
---|
| 100 | PyObject* pattern;
|
---|
| 101 | SRE_STATE state;
|
---|
| 102 | } ScannerObject;
|
---|
| 103 |
|
---|
| 104 | #endif
|
---|