| [25] | 1 | #ifndef _REGEX_H_ | 
|---|
|  | 2 | #define _REGEX_H_       /* never again */ | 
|---|
|  | 3 | /* | 
|---|
|  | 4 | * regular expressions | 
|---|
|  | 5 | * | 
|---|
|  | 6 | * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. | 
|---|
|  | 7 | * | 
|---|
|  | 8 | * Development of this software was funded, in part, by Cray Research Inc., | 
|---|
|  | 9 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | 
|---|
|  | 10 | * Corporation, none of whom are responsible for the results. The author | 
|---|
|  | 11 | * thanks all of them. | 
|---|
|  | 12 | * | 
|---|
|  | 13 | * Redistribution and use in source and binary forms -- with or without | 
|---|
|  | 14 | * modification -- are permitted for any purpose, provided that | 
|---|
|  | 15 | * redistributions in source form retain this entire copyright notice and | 
|---|
|  | 16 | * indicate the origin and nature of any modifications. | 
|---|
|  | 17 | * | 
|---|
|  | 18 | * I'd appreciate being given credit for this package in the documentation of | 
|---|
|  | 19 | * software which uses it, but that is not a requirement. | 
|---|
|  | 20 | * | 
|---|
|  | 21 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | 
|---|
|  | 22 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | 
|---|
|  | 23 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL | 
|---|
|  | 24 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
|---|
|  | 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
|---|
|  | 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | 
|---|
|  | 27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | 
|---|
|  | 28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | 
|---|
|  | 29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | 
|---|
|  | 30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|---|
|  | 31 | * | 
|---|
|  | 32 | * | 
|---|
|  | 33 | * Prototypes etc. marked with "^" within comments get gathered up (and | 
|---|
|  | 34 | * possibly edited) by the regfwd program and inserted near the bottom of this | 
|---|
|  | 35 | * file. | 
|---|
|  | 36 | * | 
|---|
|  | 37 | * We offer the option of declaring one wide-character version of the RE | 
|---|
|  | 38 | * functions as well as the char versions. To do that, define __REG_WIDE_T to | 
|---|
|  | 39 | * the type of wide characters (unfortunately, there is no consensus that | 
|---|
|  | 40 | * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the | 
|---|
|  | 41 | * names to be used for the compile and execute functions (suggestion: | 
|---|
|  | 42 | * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type, | 
|---|
|  | 43 | * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may | 
|---|
|  | 44 | * be necessary to do something like: | 
|---|
|  | 45 | * #define      __REG_WIDE_COMPILE(a,b,c,d)     re_Xcomp(a,b,c,d) | 
|---|
|  | 46 | * #define      __REG_WIDE_EXEC(a,b,c,d,e,f,g)  re_Xexec(a,b,c,d,e,f,g) | 
|---|
|  | 47 | * rather than just #defining the names as parameterless macros. | 
|---|
|  | 48 | * | 
|---|
|  | 49 | * For some specialized purposes, it may be desirable to suppress the | 
|---|
|  | 50 | * declarations of the "front end" functions, regcomp() and regexec(), or of | 
|---|
|  | 51 | * the char versions of the compile and execute functions. To suppress the | 
|---|
|  | 52 | * front-end functions, define __REG_NOFRONT. To suppress the char versions, | 
|---|
|  | 53 | * define __REG_NOCHAR. | 
|---|
|  | 54 | * | 
|---|
|  | 55 | * The right place to do those defines (and some others you may want, see | 
|---|
|  | 56 | * below) would be <sys/types.h>. If you don't have control of that file, the | 
|---|
|  | 57 | * right place to add your own defines to this file is marked below. This is | 
|---|
|  | 58 | * normally done automatically, by the makefile and regmkhdr, based on the | 
|---|
|  | 59 | * contents of regcustom.h. | 
|---|
|  | 60 | */ | 
|---|
|  | 61 |  | 
|---|
|  | 62 | /* | 
|---|
|  | 63 | * voodoo for C++ | 
|---|
|  | 64 | */ | 
|---|
|  | 65 | #ifdef __cplusplus | 
|---|
|  | 66 | extern "C" { | 
|---|
|  | 67 | #endif | 
|---|
|  | 68 |  | 
|---|
|  | 69 | /* | 
|---|
|  | 70 | * Add your own defines, if needed, here. | 
|---|
|  | 71 | */ | 
|---|
|  | 72 |  | 
|---|
|  | 73 | /* | 
|---|
|  | 74 | * Location where a chunk of regcustom.h is automatically spliced into this | 
|---|
|  | 75 | * file (working from its prototype, regproto.h). | 
|---|
|  | 76 | */ | 
|---|
|  | 77 |  | 
|---|
|  | 78 | /* --- begin --- */ | 
|---|
|  | 79 | /* ensure certain things don't sneak in from system headers */ | 
|---|
|  | 80 | #ifdef __REG_WIDE_T | 
|---|
|  | 81 | #undef __REG_WIDE_T | 
|---|
|  | 82 | #endif | 
|---|
|  | 83 | #ifdef __REG_WIDE_COMPILE | 
|---|
|  | 84 | #undef __REG_WIDE_COMPILE | 
|---|
|  | 85 | #endif | 
|---|
|  | 86 | #ifdef __REG_WIDE_EXEC | 
|---|
|  | 87 | #undef __REG_WIDE_EXEC | 
|---|
|  | 88 | #endif | 
|---|
|  | 89 | #ifdef __REG_REGOFF_T | 
|---|
|  | 90 | #undef __REG_REGOFF_T | 
|---|
|  | 91 | #endif | 
|---|
|  | 92 | #ifdef __REG_VOID_T | 
|---|
|  | 93 | #undef __REG_VOID_T | 
|---|
|  | 94 | #endif | 
|---|
|  | 95 | #ifdef __REG_CONST | 
|---|
|  | 96 | #undef __REG_CONST | 
|---|
|  | 97 | #endif | 
|---|
|  | 98 | #ifdef __REG_NOFRONT | 
|---|
|  | 99 | #undef __REG_NOFRONT | 
|---|
|  | 100 | #endif | 
|---|
|  | 101 | #ifdef __REG_NOCHAR | 
|---|
|  | 102 | #undef __REG_NOCHAR | 
|---|
|  | 103 | #endif | 
|---|
|  | 104 | /* interface types */ | 
|---|
|  | 105 | #define __REG_WIDE_T    Tcl_UniChar | 
|---|
|  | 106 | #define __REG_REGOFF_T  long    /* not really right, but good enough... */ | 
|---|
|  | 107 | #define __REG_VOID_T    VOID | 
|---|
|  | 108 | #define __REG_CONST     CONST | 
|---|
|  | 109 | /* names and declarations */ | 
|---|
|  | 110 | #define __REG_WIDE_COMPILE      TclReComp | 
|---|
|  | 111 | #define __REG_WIDE_EXEC         TclReExec | 
|---|
|  | 112 | #define __REG_NOFRONT           /* don't want regcomp() and regexec() */ | 
|---|
|  | 113 | #define __REG_NOCHAR            /* or the char versions */ | 
|---|
|  | 114 | #define regfree         TclReFree | 
|---|
|  | 115 | #define regerror        TclReError | 
|---|
|  | 116 | /* --- end --- */ | 
|---|
|  | 117 |  | 
|---|
|  | 118 | /* | 
|---|
|  | 119 | * interface types etc. | 
|---|
|  | 120 | */ | 
|---|
|  | 121 |  | 
|---|
|  | 122 | /* | 
|---|
|  | 123 | * regoff_t has to be large enough to hold either off_t or ssize_t, and must | 
|---|
|  | 124 | * be signed; it's only a guess that long is suitable, so we offer | 
|---|
|  | 125 | * <sys/types.h> an override. | 
|---|
|  | 126 | */ | 
|---|
|  | 127 | #ifdef __REG_REGOFF_T | 
|---|
|  | 128 | typedef __REG_REGOFF_T regoff_t; | 
|---|
|  | 129 | #else | 
|---|
|  | 130 | typedef long regoff_t; | 
|---|
|  | 131 | #endif | 
|---|
|  | 132 |  | 
|---|
|  | 133 | /* | 
|---|
|  | 134 | * For benefit of old compilers, we offer <sys/types.h> the option of | 
|---|
|  | 135 | * overriding the `void' type used to declare nonexistent return types. | 
|---|
|  | 136 | */ | 
|---|
|  | 137 | #ifdef __REG_VOID_T | 
|---|
|  | 138 | typedef __REG_VOID_T re_void; | 
|---|
|  | 139 | #else | 
|---|
|  | 140 | typedef void re_void; | 
|---|
|  | 141 | #endif | 
|---|
|  | 142 |  | 
|---|
|  | 143 | /* | 
|---|
|  | 144 | * Also for benefit of old compilers, <sys/types.h> can supply a macro which | 
|---|
|  | 145 | * expands to a substitute for `const'. | 
|---|
|  | 146 | */ | 
|---|
|  | 147 | #ifndef __REG_CONST | 
|---|
|  | 148 | #define __REG_CONST     const | 
|---|
|  | 149 | #endif | 
|---|
|  | 150 |  | 
|---|
|  | 151 |  | 
|---|
|  | 152 |  | 
|---|
|  | 153 | /* | 
|---|
|  | 154 | * other interface types | 
|---|
|  | 155 | */ | 
|---|
|  | 156 |  | 
|---|
|  | 157 | /* the biggie, a compiled RE (or rather, a front end to same) */ | 
|---|
|  | 158 | typedef struct { | 
|---|
|  | 159 | int re_magic;               /* magic number */ | 
|---|
|  | 160 | size_t re_nsub;             /* number of subexpressions */ | 
|---|
|  | 161 | long re_info;               /* information about RE */ | 
|---|
|  | 162 | #define REG_UBACKREF            000001 | 
|---|
|  | 163 | #define REG_ULOOKAHEAD          000002 | 
|---|
|  | 164 | #define REG_UBOUNDS             000004 | 
|---|
|  | 165 | #define REG_UBRACES             000010 | 
|---|
|  | 166 | #define REG_UBSALNUM            000020 | 
|---|
|  | 167 | #define REG_UPBOTCH             000040 | 
|---|
|  | 168 | #define REG_UBBS                000100 | 
|---|
|  | 169 | #define REG_UNONPOSIX           000200 | 
|---|
|  | 170 | #define REG_UUNSPEC             000400 | 
|---|
|  | 171 | #define REG_UUNPORT             001000 | 
|---|
|  | 172 | #define REG_ULOCALE             002000 | 
|---|
|  | 173 | #define REG_UEMPTYMATCH         004000 | 
|---|
|  | 174 | #define REG_UIMPOSSIBLE         010000 | 
|---|
|  | 175 | #define REG_USHORTEST           020000 | 
|---|
|  | 176 | int re_csize;               /* sizeof(character) */ | 
|---|
|  | 177 | char *re_endp;              /* backward compatibility kludge */ | 
|---|
|  | 178 | /* the rest is opaque pointers to hidden innards */ | 
|---|
|  | 179 | char *re_guts;              /* `char *' is more portable than `void *' */ | 
|---|
|  | 180 | char *re_fns; | 
|---|
|  | 181 | } regex_t; | 
|---|
|  | 182 |  | 
|---|
|  | 183 | /* result reporting (may acquire more fields later) */ | 
|---|
|  | 184 | typedef struct { | 
|---|
|  | 185 | regoff_t rm_so;             /* start of substring */ | 
|---|
|  | 186 | regoff_t rm_eo;             /* end of substring */ | 
|---|
|  | 187 | } regmatch_t; | 
|---|
|  | 188 |  | 
|---|
|  | 189 | /* supplementary control and reporting */ | 
|---|
|  | 190 | typedef struct { | 
|---|
|  | 191 | regmatch_t rm_extend;       /* see REG_EXPECT */ | 
|---|
|  | 192 | } rm_detail_t; | 
|---|
|  | 193 |  | 
|---|
|  | 194 | /* | 
|---|
|  | 195 | * compilation | 
|---|
|  | 196 | ^ #ifndef __REG_NOCHAR | 
|---|
|  | 197 | ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); | 
|---|
|  | 198 | ^ #endif | 
|---|
|  | 199 | ^ #ifndef __REG_NOFRONT | 
|---|
|  | 200 | ^ int regcomp(regex_t *, __REG_CONST char *, int); | 
|---|
|  | 201 | ^ #endif | 
|---|
|  | 202 | ^ #ifdef __REG_WIDE_T | 
|---|
|  | 203 | ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); | 
|---|
|  | 204 | ^ #endif | 
|---|
|  | 205 | */ | 
|---|
|  | 206 | #define REG_BASIC       000000  /* BREs (convenience) */ | 
|---|
|  | 207 | #define REG_EXTENDED    000001  /* EREs */ | 
|---|
|  | 208 | #define REG_ADVF        000002  /* advanced features in EREs */ | 
|---|
|  | 209 | #define REG_ADVANCED    000003  /* AREs (which are also EREs) */ | 
|---|
|  | 210 | #define REG_QUOTE       000004  /* no special characters, none */ | 
|---|
|  | 211 | #define REG_NOSPEC      REG_QUOTE       /* historical synonym */ | 
|---|
|  | 212 | #define REG_ICASE       000010  /* ignore case */ | 
|---|
|  | 213 | #define REG_NOSUB       000020  /* don't care about subexpressions */ | 
|---|
|  | 214 | #define REG_EXPANDED    000040  /* expanded format, white space & comments */ | 
|---|
|  | 215 | #define REG_NLSTOP      000100  /* \n doesn't match . or [^ ] */ | 
|---|
|  | 216 | #define REG_NLANCH      000200  /* ^ matches after \n, $ before */ | 
|---|
|  | 217 | #define REG_NEWLINE     000300  /* newlines are line terminators */ | 
|---|
|  | 218 | #define REG_PEND        000400  /* ugh -- backward-compatibility hack */ | 
|---|
|  | 219 | #define REG_EXPECT      001000  /* report details on partial/limited matches */ | 
|---|
|  | 220 | #define REG_BOSONLY     002000  /* temporary kludge for BOS-only matches */ | 
|---|
|  | 221 | #define REG_DUMP        004000  /* none of your business :-) */ | 
|---|
|  | 222 | #define REG_FAKE        010000  /* none of your business :-) */ | 
|---|
|  | 223 | #define REG_PROGRESS    020000  /* none of your business :-) */ | 
|---|
|  | 224 |  | 
|---|
|  | 225 | /* | 
|---|
|  | 226 | * execution | 
|---|
|  | 227 | ^ #ifndef __REG_NOCHAR | 
|---|
|  | 228 | ^ int re_exec(regex_t *, __REG_CONST char *, size_t, | 
|---|
|  | 229 | ^                              rm_detail_t *, size_t, regmatch_t [], int); | 
|---|
|  | 230 | ^ #endif | 
|---|
|  | 231 | ^ #ifndef __REG_NOFRONT | 
|---|
|  | 232 | ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); | 
|---|
|  | 233 | ^ #endif | 
|---|
|  | 234 | ^ #ifdef __REG_WIDE_T | 
|---|
|  | 235 | ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, | 
|---|
|  | 236 | ^                              rm_detail_t *, size_t, regmatch_t [], int); | 
|---|
|  | 237 | ^ #endif | 
|---|
|  | 238 | */ | 
|---|
|  | 239 | #define REG_NOTBOL      0001    /* BOS is not BOL */ | 
|---|
|  | 240 | #define REG_NOTEOL      0002    /* EOS is not EOL */ | 
|---|
|  | 241 | #define REG_STARTEND    0004    /* backward compatibility kludge */ | 
|---|
|  | 242 | #define REG_FTRACE      0010    /* none of your business */ | 
|---|
|  | 243 | #define REG_MTRACE      0020    /* none of your business */ | 
|---|
|  | 244 | #define REG_SMALL       0040    /* none of your business */ | 
|---|
|  | 245 |  | 
|---|
|  | 246 | /* | 
|---|
|  | 247 | * misc generics (may be more functions here eventually) | 
|---|
|  | 248 | ^ re_void regfree(regex_t *); | 
|---|
|  | 249 | */ | 
|---|
|  | 250 |  | 
|---|
|  | 251 | /* | 
|---|
|  | 252 | * error reporting | 
|---|
|  | 253 | * Be careful if modifying the list of error codes -- the table used by | 
|---|
|  | 254 | * regerror() is generated automatically from this file! | 
|---|
|  | 255 | * | 
|---|
|  | 256 | * Note that there is no wide-char variant of regerror at this time; what kind | 
|---|
|  | 257 | * of character is used for error reports is independent of what kind is used | 
|---|
|  | 258 | * in matching. | 
|---|
|  | 259 | * | 
|---|
|  | 260 | ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); | 
|---|
|  | 261 | */ | 
|---|
|  | 262 | #define REG_OKAY         0      /* no errors detected */ | 
|---|
|  | 263 | #define REG_NOMATCH      1      /* failed to match */ | 
|---|
|  | 264 | #define REG_BADPAT       2      /* invalid regexp */ | 
|---|
|  | 265 | #define REG_ECOLLATE     3      /* invalid collating element */ | 
|---|
|  | 266 | #define REG_ECTYPE       4      /* invalid character class */ | 
|---|
|  | 267 | #define REG_EESCAPE      5      /* invalid escape \ sequence */ | 
|---|
|  | 268 | #define REG_ESUBREG      6      /* invalid backreference number */ | 
|---|
|  | 269 | #define REG_EBRACK       7      /* brackets [] not balanced */ | 
|---|
|  | 270 | #define REG_EPAREN       8      /* parentheses () not balanced */ | 
|---|
|  | 271 | #define REG_EBRACE       9      /* braces {} not balanced */ | 
|---|
|  | 272 | #define REG_BADBR       10      /* invalid repetition count(s) */ | 
|---|
|  | 273 | #define REG_ERANGE      11      /* invalid character range */ | 
|---|
|  | 274 | #define REG_ESPACE      12      /* out of memory */ | 
|---|
|  | 275 | #define REG_BADRPT      13      /* quantifier operand invalid */ | 
|---|
|  | 276 | #define REG_ASSERT      15      /* "can't happen" -- you found a bug */ | 
|---|
|  | 277 | #define REG_INVARG      16      /* invalid argument to regex function */ | 
|---|
|  | 278 | #define REG_MIXED       17      /* character widths of regex and string differ */ | 
|---|
|  | 279 | #define REG_BADOPT      18      /* invalid embedded option */ | 
|---|
|  | 280 | #define REG_ETOOBIG     19      /* nfa has too many states */ | 
|---|
|  | 281 | /* two specials for debugging and testing */ | 
|---|
|  | 282 | #define REG_ATOI        101     /* convert error-code name to number */ | 
|---|
|  | 283 | #define REG_ITOA        102     /* convert error-code number to name */ | 
|---|
|  | 284 |  | 
|---|
|  | 285 | /* | 
|---|
|  | 286 | * the prototypes, as possibly munched by regfwd | 
|---|
|  | 287 | */ | 
|---|
|  | 288 | /* =====^!^===== begin forwards =====^!^===== */ | 
|---|
|  | 289 | /* automatically gathered by fwd; do not hand-edit */ | 
|---|
|  | 290 | /* === regproto.h === */ | 
|---|
|  | 291 | #ifndef __REG_NOCHAR | 
|---|
|  | 292 | int re_comp(regex_t *, __REG_CONST char *, size_t, int); | 
|---|
|  | 293 | #endif | 
|---|
|  | 294 | #ifndef __REG_NOFRONT | 
|---|
|  | 295 | int regcomp(regex_t *, __REG_CONST char *, int); | 
|---|
|  | 296 | #endif | 
|---|
|  | 297 | #ifdef __REG_WIDE_T | 
|---|
|  | 298 | MODULE_SCOPE int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); | 
|---|
|  | 299 | #endif | 
|---|
|  | 300 | #ifndef __REG_NOCHAR | 
|---|
|  | 301 | int re_exec(regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int); | 
|---|
|  | 302 | #endif | 
|---|
|  | 303 | #ifndef __REG_NOFRONT | 
|---|
|  | 304 | int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); | 
|---|
|  | 305 | #endif | 
|---|
|  | 306 | #ifdef __REG_WIDE_T | 
|---|
|  | 307 | MODULE_SCOPE int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int); | 
|---|
|  | 308 | #endif | 
|---|
|  | 309 | MODULE_SCOPE re_void regfree(regex_t *); | 
|---|
|  | 310 | MODULE_SCOPE size_t regerror(int, __REG_CONST regex_t *, char *, size_t); | 
|---|
|  | 311 | /* automatically gathered by fwd; do not hand-edit */ | 
|---|
|  | 312 | /* =====^!^===== end forwards =====^!^===== */ | 
|---|
|  | 313 |  | 
|---|
|  | 314 | /* | 
|---|
|  | 315 | * more C++ voodoo | 
|---|
|  | 316 | */ | 
|---|
|  | 317 | #ifdef __cplusplus | 
|---|
|  | 318 | } | 
|---|
|  | 319 | #endif | 
|---|
|  | 320 |  | 
|---|
|  | 321 | #endif | 
|---|