Ignore:
Timestamp:
Jul 10, 2005, 11:12:06 AM (20 years ago)
Author:
bird
Message:

Replaced BSD regex with the new GLIBC implementation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/emx/include/regex.h

    • Property cvs2svn:cvs-rev changed from 1.4 to 1.5
    r2239 r2240  
    1 /* regex.h,v 1.2 2004/09/14 22:27:35 bird Exp */
    2 /** @file
    3  * FreeBSD 5.3
    4  * @changed bird: Added REG_NOMATCH.
     1/* Definitions for data structures and routines for the regular
     2   expression library.
     3   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003
     4   Free Software Foundation, Inc.
     5   This file is part of the GNU C Library.
     6
     7   The GNU C Library is free software; you can redistribute it and/or
     8   modify it under the terms of the GNU Lesser General Public
     9   License as published by the Free Software Foundation; either
     10   version 2.1 of the License, or (at your option) any later version.
     11
     12   The GNU C Library is distributed in the hope that it will be useful,
     13   but WITHOUT ANY WARRANTY; without even the implied warranty of
     14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15   Lesser General Public License for more details.
     16
     17   You should have received a copy of the GNU Lesser General Public
     18   License along with the GNU C Library; if not, write to the Free
     19   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     20   02111-1307 USA.  */
     21
     22#ifndef _REGEX_H
     23#define _REGEX_H 1
     24
     25#include <sys/types.h>
     26
     27/* Allow the use in C++ code.  */
     28#ifdef __cplusplus
     29extern "C" {
     30#endif
     31
     32/* POSIX says that <sys/types.h> must be included (by the caller) before
     33   <regex.h>.  */
     34
     35#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
     36/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
     37   should be there.  */
     38# include <stddef.h>
     39#endif
     40
     41/* The following two types have to be signed and unsigned integer type
     42   wide enough to hold a value of a pointer.  For most ANSI compilers
     43   ptrdiff_t and size_t should be likely OK.  Still size of these two
     44   types is 2 for Microsoft C.  Ugh... */
     45typedef long int s_reg_t;
     46typedef unsigned long int active_reg_t;
     47
     48/* The following bits are used to determine the regexp syntax we
     49   recognize.  The set/not-set meanings are chosen so that Emacs syntax
     50   remains the value 0.  The bits are given in alphabetical order, and
     51   the definitions shifted by one from the previous bit; thus, when we
     52   add or remove a bit, only one other definition need change.  */
     53typedef unsigned long int reg_syntax_t;
     54
     55/* If this bit is not set, then \ inside a bracket expression is literal.
     56   If set, then such a \ quotes the following character.  */
     57#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
     58
     59/* If this bit is not set, then + and ? are operators, and \+ and \? are
     60     literals.
     61   If set, then \+ and \? are operators and + and ? are literals.  */
     62#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
     63
     64/* If this bit is set, then character classes are supported.  They are:
     65     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
     66     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
     67   If not set, then character classes are not supported.  */
     68#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
     69
     70/* If this bit is set, then ^ and $ are always anchors (outside bracket
     71     expressions, of course).
     72   If this bit is not set, then it depends:
     73        ^  is an anchor if it is at the beginning of a regular
     74           expression or after an open-group or an alternation operator;
     75        $  is an anchor if it is at the end of a regular expression, or
     76           before a close-group or an alternation operator.
     77
     78   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
     79   POSIX draft 11.2 says that * etc. in leading positions is undefined.
     80   We already implemented a previous draft which made those constructs
     81   invalid, though, so we haven't changed the code back.  */
     82#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
     83
     84/* If this bit is set, then special characters are always special
     85     regardless of where they are in the pattern.
     86   If this bit is not set, then special characters are special only in
     87     some contexts; otherwise they are ordinary.  Specifically,
     88     * + ? and intervals are only special when not after the beginning,
     89     open-group, or alternation operator.  */
     90#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
     91
     92/* If this bit is set, then *, +, ?, and { cannot be first in an re or
     93     immediately after an alternation or begin-group operator.  */
     94#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
     95
     96/* If this bit is set, then . matches newline.
     97   If not set, then it doesn't.  */
     98#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
     99
     100/* If this bit is set, then . doesn't match NUL.
     101   If not set, then it does.  */
     102#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
     103
     104/* If this bit is set, nonmatching lists [^...] do not match newline.
     105   If not set, they do.  */
     106#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
     107
     108/* If this bit is set, either \{...\} or {...} defines an
     109     interval, depending on RE_NO_BK_BRACES.
     110   If not set, \{, \}, {, and } are literals.  */
     111#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
     112
     113/* If this bit is set, +, ? and | aren't recognized as operators.
     114   If not set, they are.  */
     115#define RE_LIMITED_OPS (RE_INTERVALS << 1)
     116
     117/* If this bit is set, newline is an alternation operator.
     118   If not set, newline is literal.  */
     119#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
     120
     121/* If this bit is set, then `{...}' defines an interval, and \{ and \}
     122     are literals.
     123  If not set, then `\{...\}' defines an interval.  */
     124#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
     125
     126/* If this bit is set, (...) defines a group, and \( and \) are literals.
     127   If not set, \(...\) defines a group, and ( and ) are literals.  */
     128#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
     129
     130/* If this bit is set, then \<digit> matches <digit>.
     131   If not set, then \<digit> is a back-reference.  */
     132#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
     133
     134/* If this bit is set, then | is an alternation operator, and \| is literal.
     135   If not set, then \| is an alternation operator, and | is literal.  */
     136#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
     137
     138/* If this bit is set, then an ending range point collating higher
     139     than the starting range point, as in [z-a], is invalid.
     140   If not set, then when ending range point collates higher than the
     141     starting range point, the range is ignored.  */
     142#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
     143
     144/* If this bit is set, then an unmatched ) is ordinary.
     145   If not set, then an unmatched ) is invalid.  */
     146#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
     147
     148/* If this bit is set, succeed as soon as we match the whole pattern,
     149   without further backtracking.  */
     150#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
     151
     152/* If this bit is set, do not process the GNU regex operators.
     153   If not set, then the GNU regex operators are recognized. */
     154#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
     155
     156/* If this bit is set, turn on internal regex debugging.
     157   If not set, and debugging was on, turn it off.
     158   This only works if regex.c is compiled -DDEBUG.
     159   We define this bit always, so that all that's needed to turn on
     160   debugging is to recompile regex.c; the calling code can always have
     161   this bit set, and it won't affect anything in the normal case. */
     162#define RE_DEBUG (RE_NO_GNU_OPS << 1)
     163
     164/* If this bit is set, a syntactically invalid interval is treated as
     165   a string of ordinary characters.  For example, the ERE 'a{1' is
     166   treated as 'a\{1'.  */
     167#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
     168
     169/* If this bit is set, then ignore case when matching.
     170   If not set, then case is significant.  */
     171#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
     172
     173/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
     174   for ^, because it is difficult to scan the regex backwards to find
     175   whether ^ should be special.  */
     176#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
     177
     178/* If this bit is set, then \{ cannot be first in an bre or
     179   immediately after an alternation or begin-group operator.  */
     180#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
     181
     182/* If this bit is set, then no_sub will be set to 1 during
     183   re_compile_pattern.  */
     184#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
     185
     186/* This global variable defines the particular regexp syntax to use (for
     187   some interfaces).  When a regexp is compiled, the syntax used is
     188   stored in the pattern buffer, so changing this does not affect
     189   already-compiled regexps.  */
     190extern reg_syntax_t re_syntax_options;
     191
     192
     193/* Define combinations of the above bits for the standard possibilities.
     194   (The [[[ comments delimit what gets put into the Texinfo file, so
     195   don't delete them!)  */
     196/* [[[begin syntaxes]]] */
     197#define RE_SYNTAX_EMACS 0
     198
     199#define RE_SYNTAX_AWK                                                   \
     200  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL                     \
     201   | RE_NO_BK_PARENS              | RE_NO_BK_REFS                       \
     202   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES                  \
     203   | RE_DOT_NEWLINE               | RE_CONTEXT_INDEP_ANCHORS            \
     204   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
     205
     206#define RE_SYNTAX_GNU_AWK                                               \
     207  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
     208   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS            \
     209       | RE_CONTEXT_INVALID_OPS ))
     210
     211#define RE_SYNTAX_POSIX_AWK                                             \
     212  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS              \
     213   | RE_INTERVALS           | RE_NO_GNU_OPS)
     214
     215#define RE_SYNTAX_GREP                                                  \
     216  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES                         \
     217   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS                            \
     218   | RE_NEWLINE_ALT)
     219
     220#define RE_SYNTAX_EGREP                                                 \
     221  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS                    \
     222   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE                    \
     223   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS                             \
     224   | RE_NO_BK_VBAR)
     225
     226#define RE_SYNTAX_POSIX_EGREP                                           \
     227  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES                     \
     228   | RE_INVALID_INTERVAL_ORD)
     229
     230/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
     231#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
     232
     233#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
     234
     235/* Syntax bits common to both basic and extended POSIX regex syntax.  */
     236#define _RE_SYNTAX_POSIX_COMMON                                         \
     237  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL              \
     238   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
     239
     240#define RE_SYNTAX_POSIX_BASIC                                           \
     241  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
     242
     243/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
     244   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
     245   isn't minimal, since other operators, such as \`, aren't disabled.  */
     246#define RE_SYNTAX_POSIX_MINIMAL_BASIC                                   \
     247  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
     248
     249#define RE_SYNTAX_POSIX_EXTENDED                                        \
     250  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS                  \
     251   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES                           \
     252   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR                             \
     253   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
     254
     255/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
     256   removed and RE_NO_BK_REFS is added.  */
     257#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED                                \
     258  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS                  \
     259   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES                           \
     260   | RE_NO_BK_PARENS        | RE_NO_BK_REFS                             \
     261   | RE_NO_BK_VBAR          | RE_UNMATCHED_RIGHT_PAREN_ORD)
     262/* [[[end syntaxes]]] */
     263
     264
     265/* Maximum number of duplicates an interval can allow.  Some systems
     266   (erroneously) define this in other header files, but we want our
     267   value, so remove any previous define.  */
     268#ifdef RE_DUP_MAX
     269# undef RE_DUP_MAX
     270#endif
     271/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
     272#define RE_DUP_MAX (0x7fff)
     273
     274
     275/* POSIX `cflags' bits (i.e., information for `regcomp').  */
     276
     277/* If this bit is set, then use extended regular expression syntax.
     278   If not set, then use basic regular expression syntax.  */
     279#define REG_EXTENDED 1
     280
     281/* If this bit is set, then ignore case when matching.
     282   If not set, then case is significant.  */
     283#define REG_ICASE (REG_EXTENDED << 1)
     284
     285/* If this bit is set, then anchors do not match at newline
     286     characters in the string.
     287   If not set, then anchors do match at newlines.  */
     288#define REG_NEWLINE (REG_ICASE << 1)
     289
     290/* If this bit is set, then report only success or fail in regexec.
     291   If not set, then returns differ between not matching and errors.  */
     292#define REG_NOSUB (REG_NEWLINE << 1)
     293
     294
     295/* POSIX `eflags' bits (i.e., information for regexec).  */
     296
     297/* If this bit is set, then the beginning-of-line operator doesn't match
     298     the beginning of the string (presumably because it's not the
     299     beginning of a line).
     300   If not set, then the beginning-of-line operator does match the
     301     beginning of the string.  */
     302#define REG_NOTBOL 1
     303
     304/* Like REG_NOTBOL, except for the end-of-line.  */
     305#define REG_NOTEOL (1 << 1)
     306
     307/* Use PMATCH[0] to delimit the start and end of the search in the
     308   buffer.  */
     309#define REG_STARTEND (1 << 2)
     310
     311
     312/* If any error codes are removed, changed, or added, update the
     313   `re_error_msg' table in regex.c.  */
     314typedef enum
     315{
     316#ifdef _XOPEN_SOURCE
     317  REG_ENOSYS = -1,      /* This will never happen for this implementation.  */
     318#endif
     319
     320  REG_NOERROR = 0,      /* Success.  */
     321  REG_NOMATCH,          /* Didn't find a match (for regexec).  */
     322
     323  /* POSIX regcomp return error codes.  (In the order listed in the
     324     standard.)  */
     325  REG_BADPAT,           /* Invalid pattern.  */
     326  REG_ECOLLATE,         /* Inalid collating element.  */
     327  REG_ECTYPE,           /* Invalid character class name.  */
     328  REG_EESCAPE,          /* Trailing backslash.  */
     329  REG_ESUBREG,          /* Invalid back reference.  */
     330  REG_EBRACK,           /* Unmatched left bracket.  */
     331  REG_EPAREN,           /* Parenthesis imbalance.  */
     332  REG_EBRACE,           /* Unmatched \{.  */
     333  REG_BADBR,            /* Invalid contents of \{\}.  */
     334  REG_ERANGE,           /* Invalid range end.  */
     335  REG_ESPACE,           /* Ran out of memory.  */
     336  REG_BADRPT,           /* No preceding re for repetition op.  */
     337
     338  /* Error codes we've added.  */
     339  REG_EEND,             /* Premature end.  */
     340  REG_ESIZE,            /* Compiled pattern bigger than 2^16 bytes.  */
     341  REG_ERPAREN           /* Unmatched ) or \); not returned from regcomp.  */
     342} reg_errcode_t;
     343
     344
     345/* This data structure represents a compiled pattern.  Before calling
     346   the pattern compiler, the fields `buffer', `allocated', `fastmap',
     347   `translate', and `no_sub' can be set.  After the pattern has been
     348   compiled, the `re_nsub' field is available.  All other fields are
     349   private to the regex routines.  */
     350
     351#ifndef RE_TRANSLATE_TYPE
     352# define RE_TRANSLATE_TYPE char *
     353#endif
     354
     355struct re_pattern_buffer
     356{
     357/* [[[begin pattern_buffer]]] */
     358        /* Space that holds the compiled pattern.  It is declared as
     359          `unsigned char *' because its elements are
     360           sometimes used as array indexes.  */
     361  unsigned char *buffer;
     362
     363        /* Number of bytes to which `buffer' points.  */
     364  unsigned long int allocated;
     365
     366        /* Number of bytes actually used in `buffer'.  */
     367  unsigned long int used;
     368
     369        /* Syntax setting with which the pattern was compiled.  */
     370  reg_syntax_t syntax;
     371
     372        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
     373           the fastmap, if there is one, to skip over impossible
     374           starting points for matches.  */
     375  char *fastmap;
     376
     377        /* Either a translate table to apply to all characters before
     378           comparing them, or zero for no translation.  The translation
     379           is applied to a pattern when it is compiled and to a string
     380           when it is matched.  */
     381  RE_TRANSLATE_TYPE translate;
     382
     383        /* Number of subexpressions found by the compiler.  */
     384  size_t re_nsub;
     385
     386        /* Zero if this pattern cannot match the empty string, one else.
     387           Well, in truth it's used only in `re_search_2', to see
     388           whether or not we should use the fastmap, so we don't set
     389           this absolutely perfectly; see `re_compile_fastmap' (the
     390           `duplicate' case).  */
     391  unsigned can_be_null : 1;
     392
     393        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
     394             for `max (RE_NREGS, re_nsub + 1)' groups.
     395           If REGS_REALLOCATE, reallocate space if necessary.
     396           If REGS_FIXED, use what's there.  */
     397#define REGS_UNALLOCATED 0
     398#define REGS_REALLOCATE 1
     399#define REGS_FIXED 2
     400  unsigned regs_allocated : 2;
     401
     402        /* Set to zero when `regex_compile' compiles a pattern; set to one
     403           by `re_compile_fastmap' if it updates the fastmap.  */
     404  unsigned fastmap_accurate : 1;
     405
     406        /* If set, `re_match_2' does not return information about
     407           subexpressions.  */
     408  unsigned no_sub : 1;
     409
     410        /* If set, a beginning-of-line anchor doesn't match at the
     411           beginning of the string.  */
     412  unsigned not_bol : 1;
     413
     414        /* Similarly for an end-of-line anchor.  */
     415  unsigned not_eol : 1;
     416
     417        /* If true, an anchor at a newline matches.  */
     418  unsigned newline_anchor : 1;
     419
     420/* [[[end pattern_buffer]]] */
     421};
     422
     423typedef struct re_pattern_buffer regex_t;
     424
     425
     426/* Type for byte offsets within the string.  POSIX mandates this.  */
     427typedef int regoff_t;
     428
     429
     430/* This is the structure we store register match data in.  See
     431   regex.texinfo for a full description of what registers match.  */
     432struct re_registers
     433{
     434  unsigned num_regs;
     435  regoff_t *start;
     436  regoff_t *end;
     437};
     438
     439
     440/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
     441   `re_match_2' returns information about at least this many registers
     442   the first time a `regs' structure is passed.  */
     443#ifndef RE_NREGS
     444# define RE_NREGS 30
     445#endif
     446
     447
     448/* POSIX specification for registers.  Aside from the different names than
     449   `re_registers', POSIX uses an array of structures, instead of a
     450   structure of arrays.  */
     451typedef struct
     452{
     453  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
     454  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
     455} regmatch_t;
     456
     457
     458/* Declarations for routines.  */
     459
     460/* To avoid duplicating every routine declaration -- once with a
     461   prototype (if we are ANSI), and once without (if we aren't) -- we
     462   use the following macro to declare argument types.  This
     463   unfortunately clutters up the declarations a bit, but I think it's
     464   worth it.  */
     465
     466#if __STDC__
     467
     468# define _RE_ARGS(args) args
     469
     470#else /* not __STDC__ */
     471
     472# define _RE_ARGS(args) ()
     473#error "asdf"
     474
     475#endif /* not __STDC__ */
     476
     477/* Sets the current default syntax to SYNTAX, and return the old syntax.
     478   You can also simply assign to the `re_syntax_options' variable.  */
     479extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
     480
     481/* Compile the regular expression PATTERN, with length LENGTH
     482   and syntax given by the global `re_syntax_options', into the buffer
     483   BUFFER.  Return NULL if successful, and an error string if not.  */
     484extern const char *re_compile_pattern
     485  _RE_ARGS ((const char *pattern, size_t length,
     486             struct re_pattern_buffer *buffer));
     487
     488
     489/* Compile a fastmap for the compiled pattern in BUFFER; used to
     490   accelerate searches.  Return 0 if successful and -2 if was an
     491   internal error.  */
     492extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
     493
     494
     495/* Search in the string STRING (with length LENGTH) for the pattern
     496   compiled into BUFFER.  Start searching at position START, for RANGE
     497   characters.  Return the starting position of the match, -1 for no
     498   match, or -2 for an internal error.  Also return register
     499   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
     500extern int re_search
     501  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
     502            int length, int start, int range, struct re_registers *regs));
     503
     504
     505/* Like `re_search', but search in the concatenation of STRING1 and
     506   STRING2.  Also, stop searching at index START + STOP.  */
     507extern int re_search_2
     508  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
     509             int length1, const char *string2, int length2,
     510             int start, int range, struct re_registers *regs, int stop));
     511
     512
     513/* Like `re_search', but return how many characters in STRING the regexp
     514   in BUFFER matched, starting at position START.  */
     515extern int re_match
     516  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
     517             int length, int start, struct re_registers *regs));
     518
     519
     520/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
     521extern int re_match_2
     522  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
     523             int length1, const char *string2, int length2,
     524             int start, struct re_registers *regs, int stop));
     525
     526
     527/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
     528   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
     529   for recording register information.  STARTS and ENDS must be
     530   allocated with malloc, and must each be at least `NUM_REGS * sizeof
     531   (regoff_t)' bytes long.
     532
     533   If NUM_REGS == 0, then subsequent matches should allocate their own
     534   register data.
     535
     536   Unless this function is called, the first search or match using
     537   PATTERN_BUFFER will allocate its own register data, without
     538   freeing the old data.  */
     539extern void re_set_registers
     540  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
     541             unsigned num_regs, regoff_t *starts, regoff_t *ends));
     542
     543#if defined _REGEX_RE_COMP || defined _LIBC
     544# ifndef _CRAY
     545/* 4.2 bsd compatibility.  */
     546extern char *re_comp _RE_ARGS ((const char *));
     547extern int re_exec _RE_ARGS ((const char *));
     548# endif
     549#endif
     550
     551/* GCC 2.95 and later have "__restrict"; C99 compilers have
     552   "restrict", and "configure" may have defined "restrict".  */
     553#ifndef __restrict
     554# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
     555#  if defined restrict || 199901L <= __STDC_VERSION__
     556#   define __restrict restrict
     557#  else
     558#   define __restrict
     559#  endif
     560# endif
     561#endif
     562/* gcc 3.1 and up support the [restrict] syntax.  */
     563#ifndef __restrict_arr
     564# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
     565#  define __restrict_arr __restrict
     566# else
     567#  define __restrict_arr
     568# endif
     569#endif
     570
     571/* POSIX compatibility.  */
     572extern int regcomp _RE_ARGS ((regex_t *__restrict __preg,
     573                              const char *__restrict __pattern,
     574                              int __cflags));
     575
     576extern int regexec _RE_ARGS ((const regex_t *__restrict __preg,
     577                              const char *__restrict __string, size_t __nmatch,
     578                              regmatch_t __pmatch[__restrict_arr],
     579                              int __eflags));
     580
     581extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg,
     582                                  char *__errbuf, size_t __errbuf_size));
     583
     584extern void regfree _RE_ARGS ((regex_t *__preg));
     585
     586
     587#ifdef __cplusplus
     588}
     589#endif  /* C++ */
     590
     591#endif /* regex.h */
     592
     593
     594/*
     595Local variables:
     596make-backup-files: t
     597version-control: t
     598trim-versions-without-asking: nil
     599End:
    5600 */
    6 /*-
    7  * Copyright (c) 1992 Henry Spencer.
    8  * Copyright (c) 1992, 1993
    9  *      The Regents of the University of California.  All rights reserved.
    10  *
    11  * This code is derived from software contributed to Berkeley by
    12  * Henry Spencer of the University of Toronto.
    13  *
    14  * Redistribution and use in source and binary forms, with or without
    15  * modification, are permitted provided that the following conditions
    16  * are met:
    17  * 1. Redistributions of source code must retain the above copyright
    18  *    notice, this list of conditions and the following disclaimer.
    19  * 2. Redistributions in binary form must reproduce the above copyright
    20  *    notice, this list of conditions and the following disclaimer in the
    21  *    documentation and/or other materials provided with the distribution.
    22  * 3. All advertising materials mentioning features or use of this software
    23  *    must display the following acknowledgement:
    24  *      This product includes software developed by the University of
    25  *      California, Berkeley and its contributors.
    26  * 4. Neither the name of the University nor the names of its contributors
    27  *    may be used to endorse or promote products derived from this software
    28  *    without specific prior written permission.
    29  *
    30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    40  * SUCH DAMAGE.
    41  *
    42  *      @(#)regex.h     8.2 (Berkeley) 1/3/94
    43  * $FreeBSD: src/include/regex.h,v 1.11 2004/07/12 06:07:26 tjr Exp $
    44  */
    45 
    46 #ifndef _REGEX_H_
    47 #define _REGEX_H_
    48 
    49 #include <sys/cdefs.h>
    50 #include <sys/_types.h>
    51 
    52 /* types */
    53 typedef __off_t         regoff_t;
    54 
    55 #if !defined(_SIZE_T_DECLARED) && !defined(_SIZE_T) /* bird: emx */
    56 typedef __size_t        size_t;
    57 #define _SIZE_T_DECLARED
    58 #define _SIZE_T                         /* bird: emx */
    59 #endif
    60 
    61 typedef struct {
    62         int re_magic;
    63         size_t re_nsub;         /* number of parenthesized subexpressions */
    64         __const char *re_endp;  /* end pointer for REG_PEND */
    65         struct re_guts *re_g;   /* none of your business :-) */
    66 } regex_t;
    67 
    68 typedef struct {
    69         regoff_t rm_so;         /* start of match */
    70         regoff_t rm_eo;         /* end of match */
    71 } regmatch_t;
    72 
    73 /* regcomp() flags */
    74 #define REG_BASIC       0000
    75 #define REG_EXTENDED    0001
    76 #define REG_ICASE       0002
    77 #define REG_NOSUB       0004
    78 #define REG_NEWLINE     0010
    79 #define REG_NOSPEC      0020
    80 #define REG_PEND        0040
    81 #define REG_DUMP        0200
    82 
    83 /* regerror() flags */
    84 #define REG_ENOSYS      (-1)
    85 #if !defined(REG_NOMATCH) && __USE_GNU  /* bird */
    86 #define REG_NOERROR      0              /* bird */
    87 #endif                                  /* bird */
    88 #define REG_NOMATCH      1
    89 #define REG_BADPAT       2
    90 #define REG_ECOLLATE     3
    91 #define REG_ECTYPE       4
    92 #define REG_EESCAPE      5
    93 #define REG_ESUBREG      6
    94 #define REG_EBRACK       7
    95 #define REG_EPAREN       8
    96 #define REG_EBRACE       9
    97 #define REG_BADBR       10
    98 #define REG_ERANGE      11
    99 #define REG_ESPACE      12
    100 #define REG_BADRPT      13
    101 #define REG_EMPTY       14
    102 #define REG_ASSERT      15
    103 #define REG_INVARG      16
    104 #define REG_ILLSEQ      17
    105 #define REG_ATOI        255     /* convert name to number (!) */
    106 #define REG_ITOA        0400    /* convert number to name (!) */
    107 
    108 /* regexec() flags */
    109 #define REG_NOTBOL      00001
    110 #define REG_NOTEOL      00002
    111 #define REG_STARTEND    00004
    112 #define REG_TRACE       00400   /* tracing of execution */
    113 #define REG_LARGE       01000   /* force large representation */
    114 #define REG_BACKR       02000   /* force use of backref code */
    115 
    116 __BEGIN_DECLS
    117 int     regcomp(regex_t * __restrict, const char * __restrict, int);
    118 size_t  regerror(int, const regex_t * __restrict, char * __restrict, size_t);
    119 /*
    120  * XXX forth parameter should be `regmatch_t [__restrict]', but isn't because
    121  * of a bug in GCC 3.2 (when -std=c99 is specified) which perceives this as a
    122  * syntax error.
    123  */
    124 int     regexec(const regex_t * __restrict, const char * __restrict, size_t,
    125             regmatch_t * __restrict, int);
    126 void    regfree(regex_t *);
    127 __END_DECLS
    128 
    129 #endif /* !_REGEX_H_ */
Note: See TracChangeset for help on using the changeset viewer.