| 1 | /* | 
|---|
| 2 | * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. | 
|---|
| 3 | * | 
|---|
| 4 | * Development of this software was funded, in part, by Cray Research Inc., | 
|---|
| 5 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | 
|---|
| 6 | * Corporation, none of whom are responsible for the results. The author | 
|---|
| 7 | * thanks all of them. | 
|---|
| 8 | * | 
|---|
| 9 | * Redistribution and use in source and binary forms - with or without | 
|---|
| 10 | * modification - are permitted for any purpose, provided that redistributions | 
|---|
| 11 | * in source form retain this entire copyright notice and indicate the origin | 
|---|
| 12 | * and nature of any modifications. | 
|---|
| 13 | * | 
|---|
| 14 | * I'd appreciate being given credit for this package in the documentation of | 
|---|
| 15 | * software which uses it, but that is not a requirement. | 
|---|
| 16 | * | 
|---|
| 17 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | 
|---|
| 18 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | 
|---|
| 19 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | 
|---|
| 20 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
|---|
| 21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
|---|
| 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | 
|---|
| 23 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | 
|---|
| 24 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | 
|---|
| 25 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | 
|---|
| 26 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 27 | */ | 
|---|
| 28 |  | 
|---|
| 29 | /* | 
|---|
| 30 | * Headers if any. | 
|---|
| 31 | */ | 
|---|
| 32 |  | 
|---|
| 33 | #include "tclInt.h" | 
|---|
| 34 |  | 
|---|
| 35 | /* | 
|---|
| 36 | * Overrides for regguts.h definitions, if any. | 
|---|
| 37 | */ | 
|---|
| 38 |  | 
|---|
| 39 | #define FUNCPTR(name, args)     (*name)args | 
|---|
| 40 | #define MALLOC(n)               ckalloc(n) | 
|---|
| 41 | #define FREE(p)                 ckfree(VS(p)) | 
|---|
| 42 | #define REALLOC(p,n)            ckrealloc(VS(p),n) | 
|---|
| 43 |  | 
|---|
| 44 | /* | 
|---|
| 45 | * Do not insert extras between the "begin" and "end" lines - this chunk is | 
|---|
| 46 | * automatically extracted to be fitted into regex.h. | 
|---|
| 47 | */ | 
|---|
| 48 |  | 
|---|
| 49 | /* --- begin --- */ | 
|---|
| 50 | /* Ensure certain things don't sneak in from system headers. */ | 
|---|
| 51 | #ifdef __REG_WIDE_T | 
|---|
| 52 | #undef __REG_WIDE_T | 
|---|
| 53 | #endif | 
|---|
| 54 | #ifdef __REG_WIDE_COMPILE | 
|---|
| 55 | #undef __REG_WIDE_COMPILE | 
|---|
| 56 | #endif | 
|---|
| 57 | #ifdef __REG_WIDE_EXEC | 
|---|
| 58 | #undef __REG_WIDE_EXEC | 
|---|
| 59 | #endif | 
|---|
| 60 | #ifdef __REG_REGOFF_T | 
|---|
| 61 | #undef __REG_REGOFF_T | 
|---|
| 62 | #endif | 
|---|
| 63 | #ifdef __REG_VOID_T | 
|---|
| 64 | #undef __REG_VOID_T | 
|---|
| 65 | #endif | 
|---|
| 66 | #ifdef __REG_CONST | 
|---|
| 67 | #undef __REG_CONST | 
|---|
| 68 | #endif | 
|---|
| 69 | #ifdef __REG_NOFRONT | 
|---|
| 70 | #undef __REG_NOFRONT | 
|---|
| 71 | #endif | 
|---|
| 72 | #ifdef __REG_NOCHAR | 
|---|
| 73 | #undef __REG_NOCHAR | 
|---|
| 74 | #endif | 
|---|
| 75 | /* Interface types */ | 
|---|
| 76 | #define __REG_WIDE_T    Tcl_UniChar | 
|---|
| 77 | #define __REG_REGOFF_T  long    /* Not really right, but good enough... */ | 
|---|
| 78 | #define __REG_VOID_T    void | 
|---|
| 79 | #define __REG_CONST     const | 
|---|
| 80 | /* Names and declarations */ | 
|---|
| 81 | #define __REG_WIDE_COMPILE      TclReComp | 
|---|
| 82 | #define __REG_WIDE_EXEC         TclReExec | 
|---|
| 83 | #define __REG_NOFRONT           /* Don't want regcomp() and regexec() */ | 
|---|
| 84 | #define __REG_NOCHAR            /* Or the char versions */ | 
|---|
| 85 | #define regfree         TclReFree | 
|---|
| 86 | #define regerror        TclReError | 
|---|
| 87 | /* --- end --- */ | 
|---|
| 88 |  | 
|---|
| 89 | /* | 
|---|
| 90 | * Internal character type and related. | 
|---|
| 91 | */ | 
|---|
| 92 |  | 
|---|
| 93 | typedef Tcl_UniChar chr;        /* The type itself. */ | 
|---|
| 94 | typedef int pchr;               /* What it promotes to. */ | 
|---|
| 95 | typedef unsigned uchr;          /* Unsigned type that will hold a chr. */ | 
|---|
| 96 | typedef int celt;               /* Type to hold chr, or NOCELT */ | 
|---|
| 97 | #define NOCELT (-1)             /* Celt value which is not valid chr */ | 
|---|
| 98 | #define CHR(c) (UCHAR(c))       /* Turn char literal into chr literal */ | 
|---|
| 99 | #define DIGITVAL(c) ((c)-'0')   /* Turn chr digit into its value */ | 
|---|
| 100 | #if TCL_UTF_MAX > 3 | 
|---|
| 101 | #define CHRBITS 32              /* Bits in a chr; must not use sizeof */ | 
|---|
| 102 | #define CHR_MIN 0x00000000      /* Smallest and largest chr; the value */ | 
|---|
| 103 | #define CHR_MAX 0xffffffff      /* CHR_MAX-CHR_MIN+1 should fit in uchr */ | 
|---|
| 104 | #else | 
|---|
| 105 | #define CHRBITS 16              /* Bits in a chr; must not use sizeof */ | 
|---|
| 106 | #define CHR_MIN 0x0000          /* Smallest and largest chr; the value */ | 
|---|
| 107 | #define CHR_MAX 0xffff          /* CHR_MAX-CHR_MIN+1 should fit in uchr */ | 
|---|
| 108 | #endif | 
|---|
| 109 |  | 
|---|
| 110 | /* | 
|---|
| 111 | * Functions operating on chr. | 
|---|
| 112 | */ | 
|---|
| 113 |  | 
|---|
| 114 | #define iscalnum(x)     Tcl_UniCharIsAlnum(x) | 
|---|
| 115 | #define iscalpha(x)     Tcl_UniCharIsAlpha(x) | 
|---|
| 116 | #define iscdigit(x)     Tcl_UniCharIsDigit(x) | 
|---|
| 117 | #define iscspace(x)     Tcl_UniCharIsSpace(x) | 
|---|
| 118 |  | 
|---|
| 119 | /* | 
|---|
| 120 | * Name the external functions. | 
|---|
| 121 | */ | 
|---|
| 122 |  | 
|---|
| 123 | #define compile         TclReComp | 
|---|
| 124 | #define exec            TclReExec | 
|---|
| 125 |  | 
|---|
| 126 | /* | 
|---|
| 127 | & Enable/disable debugging code (by whether REG_DEBUG is defined or not). | 
|---|
| 128 | */ | 
|---|
| 129 |  | 
|---|
| 130 | #if 0                           /* No debug unless requested by makefile. */ | 
|---|
| 131 | #define REG_DEBUG       /* */ | 
|---|
| 132 | #endif | 
|---|
| 133 |  | 
|---|
| 134 | /* | 
|---|
| 135 | * Method of allocating a local workspace. We used a thread-specific data | 
|---|
| 136 | * space to store this because the regular expression engine is never | 
|---|
| 137 | * reentered from the same thread; it doesn't make any callbacks. | 
|---|
| 138 | */ | 
|---|
| 139 |  | 
|---|
| 140 | #if 1 | 
|---|
| 141 | #define AllocVars(vPtr) \ | 
|---|
| 142 | static Tcl_ThreadDataKey varsKey; \ | 
|---|
| 143 | register struct vars *vPtr = (struct vars *) \ | 
|---|
| 144 | Tcl_GetThreadData(&varsKey, sizeof(struct vars)) | 
|---|
| 145 | #else | 
|---|
| 146 | /* | 
|---|
| 147 | * This strategy for allocating workspace is "more proper" in some sense, but | 
|---|
| 148 | * quite a bit slower. Using TSD (as above) leads to code that is quite a bit | 
|---|
| 149 | * faster in practice (measured!) | 
|---|
| 150 | */ | 
|---|
| 151 | #define AllocVars(vPtr) \ | 
|---|
| 152 | register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars)) | 
|---|
| 153 | #define FreeVars(vPtr) \ | 
|---|
| 154 | FREE(vPtr) | 
|---|
| 155 | #endif | 
|---|
| 156 |  | 
|---|
| 157 | /* | 
|---|
| 158 | * And pick up the standard header. | 
|---|
| 159 | */ | 
|---|
| 160 |  | 
|---|
| 161 | #include "regex.h" | 
|---|