[25] | 1 | /* |
---|
| 2 | * tclRegexp.c -- |
---|
| 3 | * |
---|
| 4 | * This file contains the public interfaces to the Tcl regular expression |
---|
| 5 | * mechanism. |
---|
| 6 | * |
---|
| 7 | * Copyright (c) 1998 by Sun Microsystems, Inc. |
---|
| 8 | * Copyright (c) 1998-1999 by Scriptics Corporation. |
---|
| 9 | * |
---|
| 10 | * See the file "license.terms" for information on usage and redistribution of |
---|
| 11 | * this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
| 12 | * |
---|
| 13 | * RCS: @(#) $Id: tclRegexp.c,v 1.28 2007/12/13 15:23:20 dgp Exp $ |
---|
| 14 | */ |
---|
| 15 | |
---|
| 16 | #include "tclInt.h" |
---|
| 17 | #include "tclRegexp.h" |
---|
| 18 | |
---|
| 19 | /* |
---|
| 20 | *---------------------------------------------------------------------- |
---|
| 21 | * The routines in this file use Henry Spencer's regular expression package |
---|
| 22 | * contained in the following additional source files: |
---|
| 23 | * |
---|
| 24 | * regc_color.c regc_cvec.c regc_lex.c |
---|
| 25 | * regc_nfa.c regcomp.c regcustom.h |
---|
| 26 | * rege_dfa.c regerror.c regerrs.h |
---|
| 27 | * regex.h regexec.c regfree.c |
---|
| 28 | * regfronts.c regguts.h |
---|
| 29 | * |
---|
| 30 | * Copyright (c) 1998 Henry Spencer. All rights reserved. |
---|
| 31 | * |
---|
| 32 | * Development of this software was funded, in part, by Cray Research Inc., |
---|
| 33 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
---|
| 34 | * Corporation, none of whom are responsible for the results. The author |
---|
| 35 | * thanks all of them. |
---|
| 36 | * |
---|
| 37 | * Redistribution and use in source and binary forms -- with or without |
---|
| 38 | * modification -- are permitted for any purpose, provided that |
---|
| 39 | * redistributions in source form retain this entire copyright notice and |
---|
| 40 | * indicate the origin and nature of any modifications. |
---|
| 41 | * |
---|
| 42 | * I'd appreciate being given credit for this package in the documentation of |
---|
| 43 | * software which uses it, but that is not a requirement. |
---|
| 44 | * |
---|
| 45 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
---|
| 46 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
---|
| 47 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
---|
| 48 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
---|
| 49 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
| 50 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
| 51 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
| 52 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
| 53 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
| 54 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
| 55 | * |
---|
| 56 | * *** NOTE: this code has been altered slightly for use in Tcl: *** |
---|
| 57 | * *** 1. Names have been changed, e.g. from re_comp to *** |
---|
| 58 | * *** TclRegComp, to avoid clashes with other *** |
---|
| 59 | * *** regexp implementations used by applications. *** |
---|
| 60 | */ |
---|
| 61 | |
---|
| 62 | /* |
---|
| 63 | * Thread local storage used to maintain a per-thread cache of compiled |
---|
| 64 | * regular expressions. |
---|
| 65 | */ |
---|
| 66 | |
---|
| 67 | #define NUM_REGEXPS 30 |
---|
| 68 | |
---|
| 69 | typedef struct ThreadSpecificData { |
---|
| 70 | int initialized; /* Set to 1 when the module is initialized. */ |
---|
| 71 | char *patterns[NUM_REGEXPS];/* Strings corresponding to compiled regular |
---|
| 72 | * expression patterns. NULL means that this |
---|
| 73 | * slot isn't used. Malloc-ed. */ |
---|
| 74 | int patLengths[NUM_REGEXPS];/* Number of non-null characters in |
---|
| 75 | * corresponding entry in patterns. -1 means |
---|
| 76 | * entry isn't used. */ |
---|
| 77 | struct TclRegexp *regexps[NUM_REGEXPS]; |
---|
| 78 | /* Compiled forms of above strings. Also |
---|
| 79 | * malloc-ed, or NULL if not in use yet. */ |
---|
| 80 | } ThreadSpecificData; |
---|
| 81 | |
---|
| 82 | static Tcl_ThreadDataKey dataKey; |
---|
| 83 | |
---|
| 84 | /* |
---|
| 85 | * Declarations for functions used only in this file. |
---|
| 86 | */ |
---|
| 87 | |
---|
| 88 | static TclRegexp * CompileRegexp(Tcl_Interp *interp, const char *pattern, |
---|
| 89 | int length, int flags); |
---|
| 90 | static void DupRegexpInternalRep(Tcl_Obj *srcPtr, |
---|
| 91 | Tcl_Obj *copyPtr); |
---|
| 92 | static void FinalizeRegexp(ClientData clientData); |
---|
| 93 | static void FreeRegexp(TclRegexp *regexpPtr); |
---|
| 94 | static void FreeRegexpInternalRep(Tcl_Obj *objPtr); |
---|
| 95 | static int RegExpExecUniChar(Tcl_Interp *interp, Tcl_RegExp re, |
---|
| 96 | const Tcl_UniChar *uniString, int numChars, |
---|
| 97 | int nmatches, int flags); |
---|
| 98 | static int SetRegexpFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); |
---|
| 99 | |
---|
| 100 | /* |
---|
| 101 | * The regular expression Tcl object type. This serves as a cache of the |
---|
| 102 | * compiled form of the regular expression. |
---|
| 103 | */ |
---|
| 104 | |
---|
| 105 | Tcl_ObjType tclRegexpType = { |
---|
| 106 | "regexp", /* name */ |
---|
| 107 | FreeRegexpInternalRep, /* freeIntRepProc */ |
---|
| 108 | DupRegexpInternalRep, /* dupIntRepProc */ |
---|
| 109 | NULL, /* updateStringProc */ |
---|
| 110 | SetRegexpFromAny /* setFromAnyProc */ |
---|
| 111 | }; |
---|
| 112 | |
---|
| 113 | /* |
---|
| 114 | *---------------------------------------------------------------------- |
---|
| 115 | * |
---|
| 116 | * Tcl_RegExpCompile -- |
---|
| 117 | * |
---|
| 118 | * Compile a regular expression into a form suitable for fast matching. |
---|
| 119 | * This function is DEPRECATED in favor of the object version of the |
---|
| 120 | * command. |
---|
| 121 | * |
---|
| 122 | * Results: |
---|
| 123 | * The return value is a pointer to the compiled form of string, suitable |
---|
| 124 | * for passing to Tcl_RegExpExec. This compiled form is only valid up |
---|
| 125 | * until the next call to this function, so don't keep these around for a |
---|
| 126 | * long time! If an error occurred while compiling the pattern, then NULL |
---|
| 127 | * is returned and an error message is left in the interp's result. |
---|
| 128 | * |
---|
| 129 | * Side effects: |
---|
| 130 | * Updates the cache of compiled regexps. |
---|
| 131 | * |
---|
| 132 | *---------------------------------------------------------------------- |
---|
| 133 | */ |
---|
| 134 | |
---|
| 135 | Tcl_RegExp |
---|
| 136 | Tcl_RegExpCompile( |
---|
| 137 | Tcl_Interp *interp, /* For use in error reporting and to access |
---|
| 138 | * the interp regexp cache. */ |
---|
| 139 | const char *pattern) /* String for which to produce compiled |
---|
| 140 | * regular expression. */ |
---|
| 141 | { |
---|
| 142 | return (Tcl_RegExp) CompileRegexp(interp, pattern, (int) strlen(pattern), |
---|
| 143 | REG_ADVANCED); |
---|
| 144 | } |
---|
| 145 | |
---|
| 146 | /* |
---|
| 147 | *---------------------------------------------------------------------- |
---|
| 148 | * |
---|
| 149 | * Tcl_RegExpExec -- |
---|
| 150 | * |
---|
| 151 | * Execute the regular expression matcher using a compiled form of a |
---|
| 152 | * regular expression and save information about any match that is found. |
---|
| 153 | * |
---|
| 154 | * Results: |
---|
| 155 | * If an error occurs during the matching operation then -1 is returned |
---|
| 156 | * and the interp's result contains an error message. Otherwise the |
---|
| 157 | * return value is 1 if a matching range is found and 0 if there is no |
---|
| 158 | * matching range. |
---|
| 159 | * |
---|
| 160 | * Side effects: |
---|
| 161 | * None. |
---|
| 162 | * |
---|
| 163 | *---------------------------------------------------------------------- |
---|
| 164 | */ |
---|
| 165 | |
---|
| 166 | int |
---|
| 167 | Tcl_RegExpExec( |
---|
| 168 | Tcl_Interp *interp, /* Interpreter to use for error reporting. */ |
---|
| 169 | Tcl_RegExp re, /* Compiled regular expression; must have been |
---|
| 170 | * returned by previous call to |
---|
| 171 | * Tcl_GetRegExpFromObj. */ |
---|
| 172 | const char *text, /* Text against which to match re. */ |
---|
| 173 | const char *start) /* If text is part of a larger string, this |
---|
| 174 | * identifies beginning of larger string, so |
---|
| 175 | * that "^" won't match. */ |
---|
| 176 | { |
---|
| 177 | int flags, result, numChars; |
---|
| 178 | TclRegexp *regexp = (TclRegexp *)re; |
---|
| 179 | Tcl_DString ds; |
---|
| 180 | const Tcl_UniChar *ustr; |
---|
| 181 | |
---|
| 182 | /* |
---|
| 183 | * If the starting point is offset from the beginning of the buffer, then |
---|
| 184 | * we need to tell the regexp engine not to match "^". |
---|
| 185 | */ |
---|
| 186 | |
---|
| 187 | if (text > start) { |
---|
| 188 | flags = REG_NOTBOL; |
---|
| 189 | } else { |
---|
| 190 | flags = 0; |
---|
| 191 | } |
---|
| 192 | |
---|
| 193 | /* |
---|
| 194 | * Remember the string for use by Tcl_RegExpRange(). |
---|
| 195 | */ |
---|
| 196 | |
---|
| 197 | regexp->string = text; |
---|
| 198 | regexp->objPtr = NULL; |
---|
| 199 | |
---|
| 200 | /* |
---|
| 201 | * Convert the string to Unicode and perform the match. |
---|
| 202 | */ |
---|
| 203 | |
---|
| 204 | Tcl_DStringInit(&ds); |
---|
| 205 | ustr = Tcl_UtfToUniCharDString(text, -1, &ds); |
---|
| 206 | numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); |
---|
| 207 | result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, |
---|
| 208 | flags); |
---|
| 209 | Tcl_DStringFree(&ds); |
---|
| 210 | |
---|
| 211 | return result; |
---|
| 212 | } |
---|
| 213 | |
---|
| 214 | /* |
---|
| 215 | *--------------------------------------------------------------------------- |
---|
| 216 | * |
---|
| 217 | * Tcl_RegExpRange -- |
---|
| 218 | * |
---|
| 219 | * Returns pointers describing the range of a regular expression match, |
---|
| 220 | * or one of the subranges within the match. |
---|
| 221 | * |
---|
| 222 | * Results: |
---|
| 223 | * The variables at *startPtr and *endPtr are modified to hold the |
---|
| 224 | * addresses of the endpoints of the range given by index. If the |
---|
| 225 | * specified range doesn't exist then NULLs are returned. |
---|
| 226 | * |
---|
| 227 | * Side effects: |
---|
| 228 | * None. |
---|
| 229 | * |
---|
| 230 | *--------------------------------------------------------------------------- |
---|
| 231 | */ |
---|
| 232 | |
---|
| 233 | void |
---|
| 234 | Tcl_RegExpRange( |
---|
| 235 | Tcl_RegExp re, /* Compiled regular expression that has been |
---|
| 236 | * passed to Tcl_RegExpExec. */ |
---|
| 237 | int index, /* 0 means give the range of the entire match, |
---|
| 238 | * > 0 means give the range of a matching |
---|
| 239 | * subrange. */ |
---|
| 240 | const char **startPtr, /* Store address of first character in |
---|
| 241 | * (sub-)range here. */ |
---|
| 242 | const char **endPtr) /* Store address of character just after last |
---|
| 243 | * in (sub-)range here. */ |
---|
| 244 | { |
---|
| 245 | TclRegexp *regexpPtr = (TclRegexp *) re; |
---|
| 246 | const char *string; |
---|
| 247 | |
---|
| 248 | if ((size_t) index > regexpPtr->re.re_nsub) { |
---|
| 249 | *startPtr = *endPtr = NULL; |
---|
| 250 | } else if (regexpPtr->matches[index].rm_so < 0) { |
---|
| 251 | *startPtr = *endPtr = NULL; |
---|
| 252 | } else { |
---|
| 253 | if (regexpPtr->objPtr) { |
---|
| 254 | string = TclGetString(regexpPtr->objPtr); |
---|
| 255 | } else { |
---|
| 256 | string = regexpPtr->string; |
---|
| 257 | } |
---|
| 258 | *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); |
---|
| 259 | *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); |
---|
| 260 | } |
---|
| 261 | } |
---|
| 262 | |
---|
| 263 | /* |
---|
| 264 | *--------------------------------------------------------------------------- |
---|
| 265 | * |
---|
| 266 | * RegExpExecUniChar -- |
---|
| 267 | * |
---|
| 268 | * Execute the regular expression matcher using a compiled form of a |
---|
| 269 | * regular expression and save information about any match that is found. |
---|
| 270 | * |
---|
| 271 | * Results: |
---|
| 272 | * If an error occurs during the matching operation then -1 is returned |
---|
| 273 | * and an error message is left in interp's result. Otherwise the return |
---|
| 274 | * value is 1 if a matching range was found or 0 if there was no matching |
---|
| 275 | * range. |
---|
| 276 | * |
---|
| 277 | * Side effects: |
---|
| 278 | * None. |
---|
| 279 | * |
---|
| 280 | *---------------------------------------------------------------------- |
---|
| 281 | */ |
---|
| 282 | |
---|
| 283 | static int |
---|
| 284 | RegExpExecUniChar( |
---|
| 285 | Tcl_Interp *interp, /* Interpreter to use for error reporting. */ |
---|
| 286 | Tcl_RegExp re, /* Compiled regular expression; returned by a |
---|
| 287 | * previous call to Tcl_GetRegExpFromObj */ |
---|
| 288 | const Tcl_UniChar *wString, /* String against which to match re. */ |
---|
| 289 | int numChars, /* Length of Tcl_UniChar string (must be |
---|
| 290 | * >=0). */ |
---|
| 291 | int nmatches, /* How many subexpression matches (counting |
---|
| 292 | * the whole match as subexpression 0) are of |
---|
| 293 | * interest. -1 means "don't know". */ |
---|
| 294 | int flags) /* Regular expression flags. */ |
---|
| 295 | { |
---|
| 296 | int status; |
---|
| 297 | TclRegexp *regexpPtr = (TclRegexp *) re; |
---|
| 298 | size_t last = regexpPtr->re.re_nsub + 1; |
---|
| 299 | size_t nm = last; |
---|
| 300 | |
---|
| 301 | if (nmatches >= 0 && (size_t) nmatches < nm) { |
---|
| 302 | nm = (size_t) nmatches; |
---|
| 303 | } |
---|
| 304 | |
---|
| 305 | status = TclReExec(®expPtr->re, wString, (size_t) numChars, |
---|
| 306 | ®expPtr->details, nm, regexpPtr->matches, flags); |
---|
| 307 | |
---|
| 308 | /* |
---|
| 309 | * Check for errors. |
---|
| 310 | */ |
---|
| 311 | |
---|
| 312 | if (status != REG_OKAY) { |
---|
| 313 | if (status == REG_NOMATCH) { |
---|
| 314 | return 0; |
---|
| 315 | } |
---|
| 316 | if (interp != NULL) { |
---|
| 317 | TclRegError(interp, "error while matching regular expression: ", |
---|
| 318 | status); |
---|
| 319 | } |
---|
| 320 | return -1; |
---|
| 321 | } |
---|
| 322 | return 1; |
---|
| 323 | } |
---|
| 324 | |
---|
| 325 | /* |
---|
| 326 | *--------------------------------------------------------------------------- |
---|
| 327 | * |
---|
| 328 | * TclRegExpRangeUniChar -- |
---|
| 329 | * |
---|
| 330 | * Returns pointers describing the range of a regular expression match, |
---|
| 331 | * or one of the subranges within the match, or the hypothetical range |
---|
| 332 | * represented by the rm_extend field of the rm_detail_t. |
---|
| 333 | * |
---|
| 334 | * Results: |
---|
| 335 | * The variables at *startPtr and *endPtr are modified to hold the |
---|
| 336 | * offsets of the endpoints of the range given by index. If the specified |
---|
| 337 | * range doesn't exist then -1s are supplied. |
---|
| 338 | * |
---|
| 339 | * Side effects: |
---|
| 340 | * None. |
---|
| 341 | * |
---|
| 342 | *--------------------------------------------------------------------------- |
---|
| 343 | */ |
---|
| 344 | |
---|
| 345 | void |
---|
| 346 | TclRegExpRangeUniChar( |
---|
| 347 | Tcl_RegExp re, /* Compiled regular expression that has been |
---|
| 348 | * passed to Tcl_RegExpExec. */ |
---|
| 349 | int index, /* 0 means give the range of the entire match, |
---|
| 350 | * > 0 means give the range of a matching |
---|
| 351 | * subrange, -1 means the range of the |
---|
| 352 | * rm_extend field. */ |
---|
| 353 | int *startPtr, /* Store address of first character in |
---|
| 354 | * (sub-)range here. */ |
---|
| 355 | int *endPtr) /* Store address of character just after last |
---|
| 356 | * in (sub-)range here. */ |
---|
| 357 | { |
---|
| 358 | TclRegexp *regexpPtr = (TclRegexp *) re; |
---|
| 359 | |
---|
| 360 | if ((regexpPtr->flags®_EXPECT) && index == -1) { |
---|
| 361 | *startPtr = regexpPtr->details.rm_extend.rm_so; |
---|
| 362 | *endPtr = regexpPtr->details.rm_extend.rm_eo; |
---|
| 363 | } else if ((size_t) index > regexpPtr->re.re_nsub) { |
---|
| 364 | *startPtr = -1; |
---|
| 365 | *endPtr = -1; |
---|
| 366 | } else { |
---|
| 367 | *startPtr = regexpPtr->matches[index].rm_so; |
---|
| 368 | *endPtr = regexpPtr->matches[index].rm_eo; |
---|
| 369 | } |
---|
| 370 | } |
---|
| 371 | |
---|
| 372 | /* |
---|
| 373 | *---------------------------------------------------------------------- |
---|
| 374 | * |
---|
| 375 | * Tcl_RegExpMatch -- |
---|
| 376 | * |
---|
| 377 | * See if a string matches a regular expression. |
---|
| 378 | * |
---|
| 379 | * Results: |
---|
| 380 | * If an error occurs during the matching operation then -1 is returned |
---|
| 381 | * and the interp's result contains an error message. Otherwise the |
---|
| 382 | * return value is 1 if "text" matches "pattern" and 0 otherwise. |
---|
| 383 | * |
---|
| 384 | * Side effects: |
---|
| 385 | * None. |
---|
| 386 | * |
---|
| 387 | *---------------------------------------------------------------------- |
---|
| 388 | */ |
---|
| 389 | |
---|
| 390 | int |
---|
| 391 | Tcl_RegExpMatch( |
---|
| 392 | Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ |
---|
| 393 | const char *text, /* Text to search for pattern matches. */ |
---|
| 394 | const char *pattern) /* Regular expression to match against text. */ |
---|
| 395 | { |
---|
| 396 | Tcl_RegExp re; |
---|
| 397 | |
---|
| 398 | re = Tcl_RegExpCompile(interp, pattern); |
---|
| 399 | if (re == NULL) { |
---|
| 400 | return -1; |
---|
| 401 | } |
---|
| 402 | return Tcl_RegExpExec(interp, re, text, text); |
---|
| 403 | } |
---|
| 404 | |
---|
| 405 | /* |
---|
| 406 | *---------------------------------------------------------------------- |
---|
| 407 | * |
---|
| 408 | * Tcl_RegExpExecObj -- |
---|
| 409 | * |
---|
| 410 | * Execute a precompiled regexp against the given object. |
---|
| 411 | * |
---|
| 412 | * Results: |
---|
| 413 | * If an error occurs during the matching operation then -1 is returned |
---|
| 414 | * and the interp's result contains an error message. Otherwise the |
---|
| 415 | * return value is 1 if "string" matches "pattern" and 0 otherwise. |
---|
| 416 | * |
---|
| 417 | * Side effects: |
---|
| 418 | * Converts the object to a Unicode object. |
---|
| 419 | * |
---|
| 420 | *---------------------------------------------------------------------- |
---|
| 421 | */ |
---|
| 422 | |
---|
| 423 | int |
---|
| 424 | Tcl_RegExpExecObj( |
---|
| 425 | Tcl_Interp *interp, /* Interpreter to use for error reporting. */ |
---|
| 426 | Tcl_RegExp re, /* Compiled regular expression; must have been |
---|
| 427 | * returned by previous call to |
---|
| 428 | * Tcl_GetRegExpFromObj. */ |
---|
| 429 | Tcl_Obj *textObj, /* Text against which to match re. */ |
---|
| 430 | int offset, /* Character index that marks where matching |
---|
| 431 | * should begin. */ |
---|
| 432 | int nmatches, /* How many subexpression matches (counting |
---|
| 433 | * the whole match as subexpression 0) are of |
---|
| 434 | * interest. -1 means all of them. */ |
---|
| 435 | int flags) /* Regular expression execution flags. */ |
---|
| 436 | { |
---|
| 437 | TclRegexp *regexpPtr = (TclRegexp *) re; |
---|
| 438 | Tcl_UniChar *udata; |
---|
| 439 | int length; |
---|
| 440 | int reflags = regexpPtr->flags; |
---|
| 441 | #define TCL_REG_GLOBOK_FLAGS (TCL_REG_ADVANCED | TCL_REG_NOSUB | TCL_REG_NOCASE) |
---|
| 442 | |
---|
| 443 | /* |
---|
| 444 | * Take advantage of the equivalent glob pattern, if one exists. |
---|
| 445 | * This is possible based only on the right mix of incoming flags (0) |
---|
| 446 | * and regexp compile flags. |
---|
| 447 | */ |
---|
| 448 | if ((offset == 0) && (nmatches == 0) && (flags == 0) |
---|
| 449 | && !(reflags & ~TCL_REG_GLOBOK_FLAGS) |
---|
| 450 | && (regexpPtr->globObjPtr != NULL)) { |
---|
| 451 | int nocase = (reflags & TCL_REG_NOCASE) ? TCL_MATCH_NOCASE : 0; |
---|
| 452 | |
---|
| 453 | /* |
---|
| 454 | * Pass to TclStringMatchObj for obj-specific handling. |
---|
| 455 | * XXX: Currently doesn't take advantage of exact-ness that |
---|
| 456 | * XXX: TclReToGlob tells us about |
---|
| 457 | */ |
---|
| 458 | |
---|
| 459 | return TclStringMatchObj(textObj, regexpPtr->globObjPtr, nocase); |
---|
| 460 | } |
---|
| 461 | |
---|
| 462 | /* |
---|
| 463 | * Save the target object so we can extract strings from it later. |
---|
| 464 | */ |
---|
| 465 | |
---|
| 466 | regexpPtr->string = NULL; |
---|
| 467 | regexpPtr->objPtr = textObj; |
---|
| 468 | |
---|
| 469 | udata = Tcl_GetUnicodeFromObj(textObj, &length); |
---|
| 470 | |
---|
| 471 | if (offset > length) { |
---|
| 472 | offset = length; |
---|
| 473 | } |
---|
| 474 | udata += offset; |
---|
| 475 | length -= offset; |
---|
| 476 | |
---|
| 477 | return RegExpExecUniChar(interp, re, udata, length, nmatches, flags); |
---|
| 478 | } |
---|
| 479 | |
---|
| 480 | /* |
---|
| 481 | *---------------------------------------------------------------------- |
---|
| 482 | * |
---|
| 483 | * Tcl_RegExpMatchObj -- |
---|
| 484 | * |
---|
| 485 | * See if an object matches a regular expression. |
---|
| 486 | * |
---|
| 487 | * Results: |
---|
| 488 | * If an error occurs during the matching operation then -1 is returned |
---|
| 489 | * and the interp's result contains an error message. Otherwise the |
---|
| 490 | * return value is 1 if "text" matches "pattern" and 0 otherwise. |
---|
| 491 | * |
---|
| 492 | * Side effects: |
---|
| 493 | * Changes the internal rep of the pattern and string objects. |
---|
| 494 | * |
---|
| 495 | *---------------------------------------------------------------------- |
---|
| 496 | */ |
---|
| 497 | |
---|
| 498 | int |
---|
| 499 | Tcl_RegExpMatchObj( |
---|
| 500 | Tcl_Interp *interp, /* Used for error reporting. May be NULL. */ |
---|
| 501 | Tcl_Obj *textObj, /* Object containing the String to search. */ |
---|
| 502 | Tcl_Obj *patternObj) /* Regular expression to match against |
---|
| 503 | * string. */ |
---|
| 504 | { |
---|
| 505 | Tcl_RegExp re; |
---|
| 506 | |
---|
| 507 | re = Tcl_GetRegExpFromObj(interp, patternObj, |
---|
| 508 | TCL_REG_ADVANCED | TCL_REG_NOSUB); |
---|
| 509 | if (re == NULL) { |
---|
| 510 | return -1; |
---|
| 511 | } |
---|
| 512 | return Tcl_RegExpExecObj(interp, re, textObj, 0 /* offset */, |
---|
| 513 | 0 /* nmatches */, 0 /* flags */); |
---|
| 514 | } |
---|
| 515 | |
---|
| 516 | /* |
---|
| 517 | *---------------------------------------------------------------------- |
---|
| 518 | * |
---|
| 519 | * Tcl_RegExpGetInfo -- |
---|
| 520 | * |
---|
| 521 | * Retrieve information about the current match. |
---|
| 522 | * |
---|
| 523 | * Results: |
---|
| 524 | * None. |
---|
| 525 | * |
---|
| 526 | * Side effects: |
---|
| 527 | * None. |
---|
| 528 | * |
---|
| 529 | *---------------------------------------------------------------------- |
---|
| 530 | */ |
---|
| 531 | |
---|
| 532 | void |
---|
| 533 | Tcl_RegExpGetInfo( |
---|
| 534 | Tcl_RegExp regexp, /* Pattern from which to get subexpressions. */ |
---|
| 535 | Tcl_RegExpInfo *infoPtr) /* Match information is stored here. */ |
---|
| 536 | { |
---|
| 537 | TclRegexp *regexpPtr = (TclRegexp *) regexp; |
---|
| 538 | |
---|
| 539 | infoPtr->nsubs = regexpPtr->re.re_nsub; |
---|
| 540 | infoPtr->matches = (Tcl_RegExpIndices *) regexpPtr->matches; |
---|
| 541 | infoPtr->extendStart = regexpPtr->details.rm_extend.rm_so; |
---|
| 542 | } |
---|
| 543 | |
---|
| 544 | /* |
---|
| 545 | *---------------------------------------------------------------------- |
---|
| 546 | * |
---|
| 547 | * Tcl_GetRegExpFromObj -- |
---|
| 548 | * |
---|
| 549 | * Compile a regular expression into a form suitable for fast matching. |
---|
| 550 | * This function caches the result in a Tcl_Obj. |
---|
| 551 | * |
---|
| 552 | * Results: |
---|
| 553 | * The return value is a pointer to the compiled form of string, suitable |
---|
| 554 | * for passing to Tcl_RegExpExec. If an error occurred while compiling |
---|
| 555 | * the pattern, then NULL is returned and an error message is left in the |
---|
| 556 | * interp's result. |
---|
| 557 | * |
---|
| 558 | * Side effects: |
---|
| 559 | * Updates the native rep of the Tcl_Obj. |
---|
| 560 | * |
---|
| 561 | *---------------------------------------------------------------------- |
---|
| 562 | */ |
---|
| 563 | |
---|
| 564 | Tcl_RegExp |
---|
| 565 | Tcl_GetRegExpFromObj( |
---|
| 566 | Tcl_Interp *interp, /* For use in error reporting, and to access |
---|
| 567 | * the interp regexp cache. */ |
---|
| 568 | Tcl_Obj *objPtr, /* Object whose string rep contains regular |
---|
| 569 | * expression pattern. Internal rep will be |
---|
| 570 | * changed to compiled form of this regular |
---|
| 571 | * expression. */ |
---|
| 572 | int flags) /* Regular expression compilation flags. */ |
---|
| 573 | { |
---|
| 574 | int length; |
---|
| 575 | TclRegexp *regexpPtr; |
---|
| 576 | char *pattern; |
---|
| 577 | |
---|
| 578 | /* |
---|
| 579 | * This is OK because we only actually interpret this value properly as a |
---|
| 580 | * TclRegexp* when the type is tclRegexpType. |
---|
| 581 | */ |
---|
| 582 | |
---|
| 583 | regexpPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; |
---|
| 584 | |
---|
| 585 | if ((objPtr->typePtr != &tclRegexpType) || (regexpPtr->flags != flags)) { |
---|
| 586 | pattern = TclGetStringFromObj(objPtr, &length); |
---|
| 587 | |
---|
| 588 | regexpPtr = CompileRegexp(interp, pattern, length, flags); |
---|
| 589 | if (regexpPtr == NULL) { |
---|
| 590 | return NULL; |
---|
| 591 | } |
---|
| 592 | |
---|
| 593 | /* |
---|
| 594 | * Add a reference to the regexp so it will persist even if it is |
---|
| 595 | * pushed out of the current thread's regexp cache. This reference |
---|
| 596 | * will be removed when the object's internal rep is freed. |
---|
| 597 | */ |
---|
| 598 | |
---|
| 599 | regexpPtr->refCount++; |
---|
| 600 | |
---|
| 601 | /* |
---|
| 602 | * Free the old representation and set our type. |
---|
| 603 | */ |
---|
| 604 | |
---|
| 605 | TclFreeIntRep(objPtr); |
---|
| 606 | objPtr->internalRep.otherValuePtr = (void *) regexpPtr; |
---|
| 607 | objPtr->typePtr = &tclRegexpType; |
---|
| 608 | } |
---|
| 609 | return (Tcl_RegExp) regexpPtr; |
---|
| 610 | } |
---|
| 611 | |
---|
| 612 | /* |
---|
| 613 | *---------------------------------------------------------------------- |
---|
| 614 | * |
---|
| 615 | * TclRegAbout -- |
---|
| 616 | * |
---|
| 617 | * Return information about a compiled regular expression. |
---|
| 618 | * |
---|
| 619 | * Results: |
---|
| 620 | * The return value is -1 for failure, 0 for success, although at the |
---|
| 621 | * moment there's nothing that could fail. On success, a list is left in |
---|
| 622 | * the interp's result: first element is the subexpression count, second |
---|
| 623 | * is a list of re_info bit names. |
---|
| 624 | * |
---|
| 625 | * Side effects: |
---|
| 626 | * None. |
---|
| 627 | * |
---|
| 628 | *---------------------------------------------------------------------- |
---|
| 629 | */ |
---|
| 630 | |
---|
| 631 | int |
---|
| 632 | TclRegAbout( |
---|
| 633 | Tcl_Interp *interp, /* For use in variable assignment. */ |
---|
| 634 | Tcl_RegExp re) /* The compiled regular expression. */ |
---|
| 635 | { |
---|
| 636 | TclRegexp *regexpPtr = (TclRegexp *) re; |
---|
| 637 | struct infoname { |
---|
| 638 | int bit; |
---|
| 639 | const char *text; |
---|
| 640 | }; |
---|
| 641 | static const struct infoname infonames[] = { |
---|
| 642 | {REG_UBACKREF, "REG_UBACKREF"}, |
---|
| 643 | {REG_ULOOKAHEAD, "REG_ULOOKAHEAD"}, |
---|
| 644 | {REG_UBOUNDS, "REG_UBOUNDS"}, |
---|
| 645 | {REG_UBRACES, "REG_UBRACES"}, |
---|
| 646 | {REG_UBSALNUM, "REG_UBSALNUM"}, |
---|
| 647 | {REG_UPBOTCH, "REG_UPBOTCH"}, |
---|
| 648 | {REG_UBBS, "REG_UBBS"}, |
---|
| 649 | {REG_UNONPOSIX, "REG_UNONPOSIX"}, |
---|
| 650 | {REG_UUNSPEC, "REG_UUNSPEC"}, |
---|
| 651 | {REG_UUNPORT, "REG_UUNPORT"}, |
---|
| 652 | {REG_ULOCALE, "REG_ULOCALE"}, |
---|
| 653 | {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"}, |
---|
| 654 | {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"}, |
---|
| 655 | {REG_USHORTEST, "REG_USHORTEST"}, |
---|
| 656 | {0, NULL} |
---|
| 657 | }; |
---|
| 658 | const struct infoname *inf; |
---|
| 659 | Tcl_Obj *infoObj; |
---|
| 660 | |
---|
| 661 | /* |
---|
| 662 | * The reset here guarantees that the interpreter result is empty and |
---|
| 663 | * unshared. This means that we can use Tcl_ListObjAppendElement on the |
---|
| 664 | * result object quite safely. |
---|
| 665 | */ |
---|
| 666 | |
---|
| 667 | Tcl_ResetResult(interp); |
---|
| 668 | |
---|
| 669 | /* |
---|
| 670 | * Assume that there will never be more than INT_MAX subexpressions. This |
---|
| 671 | * is a pretty reasonable assumption; the RE engine doesn't scale _that_ |
---|
| 672 | * well and Tcl has other limits that constrain things as well... |
---|
| 673 | */ |
---|
| 674 | |
---|
| 675 | Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), |
---|
| 676 | Tcl_NewIntObj((int) regexpPtr->re.re_nsub)); |
---|
| 677 | |
---|
| 678 | /* |
---|
| 679 | * Now append a list of all the bit-flags set for the RE. |
---|
| 680 | */ |
---|
| 681 | |
---|
| 682 | TclNewObj(infoObj); |
---|
| 683 | for (inf=infonames ; inf->bit != 0 ; inf++) { |
---|
| 684 | if (regexpPtr->re.re_info & inf->bit) { |
---|
| 685 | Tcl_ListObjAppendElement(NULL, infoObj, |
---|
| 686 | Tcl_NewStringObj(inf->text, -1)); |
---|
| 687 | } |
---|
| 688 | } |
---|
| 689 | Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), infoObj); |
---|
| 690 | |
---|
| 691 | return 0; |
---|
| 692 | } |
---|
| 693 | |
---|
| 694 | /* |
---|
| 695 | *---------------------------------------------------------------------- |
---|
| 696 | * |
---|
| 697 | * TclRegError -- |
---|
| 698 | * |
---|
| 699 | * Generate an error message based on the regexp status code. |
---|
| 700 | * |
---|
| 701 | * Results: |
---|
| 702 | * Places an error in the interpreter. |
---|
| 703 | * |
---|
| 704 | * Side effects: |
---|
| 705 | * Sets errorCode as well. |
---|
| 706 | * |
---|
| 707 | *---------------------------------------------------------------------- |
---|
| 708 | */ |
---|
| 709 | |
---|
| 710 | void |
---|
| 711 | TclRegError( |
---|
| 712 | Tcl_Interp *interp, /* Interpreter for error reporting. */ |
---|
| 713 | const char *msg, /* Message to prepend to error. */ |
---|
| 714 | int status) /* Status code to report. */ |
---|
| 715 | { |
---|
| 716 | char buf[100]; /* ample in practice */ |
---|
| 717 | char cbuf[100]; /* lots in practice */ |
---|
| 718 | size_t n; |
---|
| 719 | const char *p; |
---|
| 720 | |
---|
| 721 | Tcl_ResetResult(interp); |
---|
| 722 | n = TclReError(status, NULL, buf, sizeof(buf)); |
---|
| 723 | p = (n > sizeof(buf)) ? "..." : ""; |
---|
| 724 | Tcl_AppendResult(interp, msg, buf, p, NULL); |
---|
| 725 | |
---|
| 726 | sprintf(cbuf, "%d", status); |
---|
| 727 | (void) TclReError(REG_ITOA, NULL, cbuf, sizeof(cbuf)); |
---|
| 728 | Tcl_SetErrorCode(interp, "REGEXP", cbuf, buf, NULL); |
---|
| 729 | } |
---|
| 730 | |
---|
| 731 | /* |
---|
| 732 | *---------------------------------------------------------------------- |
---|
| 733 | * |
---|
| 734 | * FreeRegexpInternalRep -- |
---|
| 735 | * |
---|
| 736 | * Deallocate the storage associated with a regexp object's internal |
---|
| 737 | * representation. |
---|
| 738 | * |
---|
| 739 | * Results: |
---|
| 740 | * None. |
---|
| 741 | * |
---|
| 742 | * Side effects: |
---|
| 743 | * Frees the compiled regular expression. |
---|
| 744 | * |
---|
| 745 | *---------------------------------------------------------------------- |
---|
| 746 | */ |
---|
| 747 | |
---|
| 748 | static void |
---|
| 749 | FreeRegexpInternalRep( |
---|
| 750 | Tcl_Obj *objPtr) /* Regexp object with internal rep to free. */ |
---|
| 751 | { |
---|
| 752 | TclRegexp *regexpRepPtr = (TclRegexp *) objPtr->internalRep.otherValuePtr; |
---|
| 753 | |
---|
| 754 | /* |
---|
| 755 | * If this is the last reference to the regexp, free it. |
---|
| 756 | */ |
---|
| 757 | |
---|
| 758 | if (--(regexpRepPtr->refCount) <= 0) { |
---|
| 759 | FreeRegexp(regexpRepPtr); |
---|
| 760 | } |
---|
| 761 | } |
---|
| 762 | |
---|
| 763 | /* |
---|
| 764 | *---------------------------------------------------------------------- |
---|
| 765 | * |
---|
| 766 | * DupRegexpInternalRep -- |
---|
| 767 | * |
---|
| 768 | * We copy the reference to the compiled regexp and bump its reference |
---|
| 769 | * count. |
---|
| 770 | * |
---|
| 771 | * Results: |
---|
| 772 | * None. |
---|
| 773 | * |
---|
| 774 | * Side effects: |
---|
| 775 | * Increments the reference count of the regexp. |
---|
| 776 | * |
---|
| 777 | *---------------------------------------------------------------------- |
---|
| 778 | */ |
---|
| 779 | |
---|
| 780 | static void |
---|
| 781 | DupRegexpInternalRep( |
---|
| 782 | Tcl_Obj *srcPtr, /* Object with internal rep to copy. */ |
---|
| 783 | Tcl_Obj *copyPtr) /* Object with internal rep to set. */ |
---|
| 784 | { |
---|
| 785 | TclRegexp *regexpPtr = (TclRegexp *) srcPtr->internalRep.otherValuePtr; |
---|
| 786 | |
---|
| 787 | regexpPtr->refCount++; |
---|
| 788 | copyPtr->internalRep.otherValuePtr = srcPtr->internalRep.otherValuePtr; |
---|
| 789 | copyPtr->typePtr = &tclRegexpType; |
---|
| 790 | } |
---|
| 791 | |
---|
| 792 | /* |
---|
| 793 | *---------------------------------------------------------------------- |
---|
| 794 | * |
---|
| 795 | * SetRegexpFromAny -- |
---|
| 796 | * |
---|
| 797 | * Attempt to generate a compiled regular expression for the Tcl object |
---|
| 798 | * "objPtr". |
---|
| 799 | * |
---|
| 800 | * Results: |
---|
| 801 | * The return value is TCL_OK or TCL_ERROR. If an error occurs during |
---|
| 802 | * conversion, an error message is left in the interpreter's result |
---|
| 803 | * unless "interp" is NULL. |
---|
| 804 | * |
---|
| 805 | * Side effects: |
---|
| 806 | * If no error occurs, a regular expression is stored as "objPtr"s |
---|
| 807 | * internal representation. |
---|
| 808 | * |
---|
| 809 | *---------------------------------------------------------------------- |
---|
| 810 | */ |
---|
| 811 | |
---|
| 812 | static int |
---|
| 813 | SetRegexpFromAny( |
---|
| 814 | Tcl_Interp *interp, /* Used for error reporting if not NULL. */ |
---|
| 815 | Tcl_Obj *objPtr) /* The object to convert. */ |
---|
| 816 | { |
---|
| 817 | if (Tcl_GetRegExpFromObj(interp, objPtr, REG_ADVANCED) == NULL) { |
---|
| 818 | return TCL_ERROR; |
---|
| 819 | } |
---|
| 820 | return TCL_OK; |
---|
| 821 | } |
---|
| 822 | |
---|
| 823 | /* |
---|
| 824 | *--------------------------------------------------------------------------- |
---|
| 825 | * |
---|
| 826 | * CompileRegexp -- |
---|
| 827 | * |
---|
| 828 | * Attempt to compile the given regexp pattern. If the compiled regular |
---|
| 829 | * expression can be found in the per-thread cache, it will be used |
---|
| 830 | * instead of compiling a new copy. |
---|
| 831 | * |
---|
| 832 | * Results: |
---|
| 833 | * The return value is a pointer to a newly allocated TclRegexp that |
---|
| 834 | * represents the compiled pattern, or NULL if the pattern could not be |
---|
| 835 | * compiled. If NULL is returned, an error message is left in the |
---|
| 836 | * interp's result. |
---|
| 837 | * |
---|
| 838 | * Side effects: |
---|
| 839 | * The thread-local regexp cache is updated and a new TclRegexp may be |
---|
| 840 | * allocated. |
---|
| 841 | * |
---|
| 842 | *---------------------------------------------------------------------- |
---|
| 843 | */ |
---|
| 844 | |
---|
| 845 | static TclRegexp * |
---|
| 846 | CompileRegexp( |
---|
| 847 | Tcl_Interp *interp, /* Used for error reporting if not NULL. */ |
---|
| 848 | const char *string, /* The regexp to compile (UTF-8). */ |
---|
| 849 | int length, /* The length of the string in bytes. */ |
---|
| 850 | int flags) /* Compilation flags. */ |
---|
| 851 | { |
---|
| 852 | TclRegexp *regexpPtr; |
---|
| 853 | const Tcl_UniChar *uniString; |
---|
| 854 | int numChars, status, i, exact; |
---|
| 855 | Tcl_DString stringBuf; |
---|
| 856 | ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); |
---|
| 857 | |
---|
| 858 | if (!tsdPtr->initialized) { |
---|
| 859 | tsdPtr->initialized = 1; |
---|
| 860 | Tcl_CreateThreadExitHandler(FinalizeRegexp, NULL); |
---|
| 861 | } |
---|
| 862 | |
---|
| 863 | /* |
---|
| 864 | * This routine maintains a second-level regular expression cache in |
---|
| 865 | * addition to the per-object regexp cache. The per-thread cache is needed |
---|
| 866 | * to handle the case where for various reasons the object is lost between |
---|
| 867 | * invocations of the regexp command, but the literal pattern is the same. |
---|
| 868 | */ |
---|
| 869 | |
---|
| 870 | /* |
---|
| 871 | * Check the per-thread compiled regexp cache. We can only reuse a regexp |
---|
| 872 | * if it has the same pattern and the same flags. |
---|
| 873 | */ |
---|
| 874 | |
---|
| 875 | for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { |
---|
| 876 | if ((length == tsdPtr->patLengths[i]) |
---|
| 877 | && (tsdPtr->regexps[i]->flags == flags) |
---|
| 878 | && (strcmp(string, tsdPtr->patterns[i]) == 0)) { |
---|
| 879 | /* |
---|
| 880 | * Move the matched pattern to the first slot in the cache and |
---|
| 881 | * shift the other patterns down one position. |
---|
| 882 | */ |
---|
| 883 | |
---|
| 884 | if (i != 0) { |
---|
| 885 | int j; |
---|
| 886 | char *cachedString; |
---|
| 887 | |
---|
| 888 | cachedString = tsdPtr->patterns[i]; |
---|
| 889 | regexpPtr = tsdPtr->regexps[i]; |
---|
| 890 | for (j = i-1; j >= 0; j--) { |
---|
| 891 | tsdPtr->patterns[j+1] = tsdPtr->patterns[j]; |
---|
| 892 | tsdPtr->patLengths[j+1] = tsdPtr->patLengths[j]; |
---|
| 893 | tsdPtr->regexps[j+1] = tsdPtr->regexps[j]; |
---|
| 894 | } |
---|
| 895 | tsdPtr->patterns[0] = cachedString; |
---|
| 896 | tsdPtr->patLengths[0] = length; |
---|
| 897 | tsdPtr->regexps[0] = regexpPtr; |
---|
| 898 | } |
---|
| 899 | return tsdPtr->regexps[0]; |
---|
| 900 | } |
---|
| 901 | } |
---|
| 902 | |
---|
| 903 | /* |
---|
| 904 | * This is a new expression, so compile it and add it to the cache. |
---|
| 905 | */ |
---|
| 906 | |
---|
| 907 | regexpPtr = (TclRegexp *) ckalloc(sizeof(TclRegexp)); |
---|
| 908 | regexpPtr->objPtr = NULL; |
---|
| 909 | regexpPtr->string = NULL; |
---|
| 910 | regexpPtr->details.rm_extend.rm_so = -1; |
---|
| 911 | regexpPtr->details.rm_extend.rm_eo = -1; |
---|
| 912 | |
---|
| 913 | /* |
---|
| 914 | * Get the up-to-date string representation and map to unicode. |
---|
| 915 | */ |
---|
| 916 | |
---|
| 917 | Tcl_DStringInit(&stringBuf); |
---|
| 918 | uniString = Tcl_UtfToUniCharDString(string, length, &stringBuf); |
---|
| 919 | numChars = Tcl_DStringLength(&stringBuf) / sizeof(Tcl_UniChar); |
---|
| 920 | |
---|
| 921 | /* |
---|
| 922 | * Compile the string and check for errors. |
---|
| 923 | */ |
---|
| 924 | |
---|
| 925 | regexpPtr->flags = flags; |
---|
| 926 | status = TclReComp(®expPtr->re, uniString, (size_t) numChars, flags); |
---|
| 927 | Tcl_DStringFree(&stringBuf); |
---|
| 928 | |
---|
| 929 | if (status != REG_OKAY) { |
---|
| 930 | /* |
---|
| 931 | * Clean up and report errors in the interpreter, if possible. |
---|
| 932 | */ |
---|
| 933 | |
---|
| 934 | ckfree((char *)regexpPtr); |
---|
| 935 | if (interp) { |
---|
| 936 | TclRegError(interp, |
---|
| 937 | "couldn't compile regular expression pattern: ", status); |
---|
| 938 | } |
---|
| 939 | return NULL; |
---|
| 940 | } |
---|
| 941 | |
---|
| 942 | /* |
---|
| 943 | * Convert RE to a glob pattern equivalent, if any, and cache it. If this |
---|
| 944 | * is not possible, then globObjPtr will be NULL. This is used by |
---|
| 945 | * Tcl_RegExpExecObj to optionally do a fast match (avoids RE engine). |
---|
| 946 | */ |
---|
| 947 | |
---|
| 948 | if (TclReToGlob(NULL, string, length, &stringBuf, &exact) == TCL_OK) { |
---|
| 949 | regexpPtr->globObjPtr = Tcl_NewStringObj(Tcl_DStringValue(&stringBuf), |
---|
| 950 | Tcl_DStringLength(&stringBuf)); |
---|
| 951 | Tcl_IncrRefCount(regexpPtr->globObjPtr); |
---|
| 952 | Tcl_DStringFree(&stringBuf); |
---|
| 953 | } else { |
---|
| 954 | regexpPtr->globObjPtr = NULL; |
---|
| 955 | } |
---|
| 956 | |
---|
| 957 | /* |
---|
| 958 | * Allocate enough space for all of the subexpressions, plus one extra for |
---|
| 959 | * the entire pattern. |
---|
| 960 | */ |
---|
| 961 | |
---|
| 962 | regexpPtr->matches = (regmatch_t *) ckalloc( |
---|
| 963 | sizeof(regmatch_t) * (regexpPtr->re.re_nsub + 1)); |
---|
| 964 | |
---|
| 965 | /* |
---|
| 966 | * Initialize the refcount to one initially, since it is in the cache. |
---|
| 967 | */ |
---|
| 968 | |
---|
| 969 | regexpPtr->refCount = 1; |
---|
| 970 | |
---|
| 971 | /* |
---|
| 972 | * Free the last regexp, if necessary, and make room at the head of the |
---|
| 973 | * list for the new regexp. |
---|
| 974 | */ |
---|
| 975 | |
---|
| 976 | if (tsdPtr->patterns[NUM_REGEXPS-1] != NULL) { |
---|
| 977 | TclRegexp *oldRegexpPtr = tsdPtr->regexps[NUM_REGEXPS-1]; |
---|
| 978 | if (--(oldRegexpPtr->refCount) <= 0) { |
---|
| 979 | FreeRegexp(oldRegexpPtr); |
---|
| 980 | } |
---|
| 981 | ckfree(tsdPtr->patterns[NUM_REGEXPS-1]); |
---|
| 982 | } |
---|
| 983 | for (i = NUM_REGEXPS - 2; i >= 0; i--) { |
---|
| 984 | tsdPtr->patterns[i+1] = tsdPtr->patterns[i]; |
---|
| 985 | tsdPtr->patLengths[i+1] = tsdPtr->patLengths[i]; |
---|
| 986 | tsdPtr->regexps[i+1] = tsdPtr->regexps[i]; |
---|
| 987 | } |
---|
| 988 | tsdPtr->patterns[0] = (char *) ckalloc((unsigned) (length+1)); |
---|
| 989 | strcpy(tsdPtr->patterns[0], string); |
---|
| 990 | tsdPtr->patLengths[0] = length; |
---|
| 991 | tsdPtr->regexps[0] = regexpPtr; |
---|
| 992 | |
---|
| 993 | return regexpPtr; |
---|
| 994 | } |
---|
| 995 | |
---|
| 996 | /* |
---|
| 997 | *---------------------------------------------------------------------- |
---|
| 998 | * |
---|
| 999 | * FreeRegexp -- |
---|
| 1000 | * |
---|
| 1001 | * Release the storage associated with a TclRegexp. |
---|
| 1002 | * |
---|
| 1003 | * Results: |
---|
| 1004 | * None. |
---|
| 1005 | * |
---|
| 1006 | * Side effects: |
---|
| 1007 | * None. |
---|
| 1008 | * |
---|
| 1009 | *---------------------------------------------------------------------- |
---|
| 1010 | */ |
---|
| 1011 | |
---|
| 1012 | static void |
---|
| 1013 | FreeRegexp( |
---|
| 1014 | TclRegexp *regexpPtr) /* Compiled regular expression to free. */ |
---|
| 1015 | { |
---|
| 1016 | TclReFree(®expPtr->re); |
---|
| 1017 | if (regexpPtr->globObjPtr) { |
---|
| 1018 | TclDecrRefCount(regexpPtr->globObjPtr); |
---|
| 1019 | } |
---|
| 1020 | if (regexpPtr->matches) { |
---|
| 1021 | ckfree((char *) regexpPtr->matches); |
---|
| 1022 | } |
---|
| 1023 | ckfree((char *) regexpPtr); |
---|
| 1024 | } |
---|
| 1025 | |
---|
| 1026 | /* |
---|
| 1027 | *---------------------------------------------------------------------- |
---|
| 1028 | * |
---|
| 1029 | * FinalizeRegexp -- |
---|
| 1030 | * |
---|
| 1031 | * Release the storage associated with the per-thread regexp cache. |
---|
| 1032 | * |
---|
| 1033 | * Results: |
---|
| 1034 | * None. |
---|
| 1035 | * |
---|
| 1036 | * Side effects: |
---|
| 1037 | * None. |
---|
| 1038 | * |
---|
| 1039 | *---------------------------------------------------------------------- |
---|
| 1040 | */ |
---|
| 1041 | |
---|
| 1042 | static void |
---|
| 1043 | FinalizeRegexp( |
---|
| 1044 | ClientData clientData) /* Not used. */ |
---|
| 1045 | { |
---|
| 1046 | int i; |
---|
| 1047 | TclRegexp *regexpPtr; |
---|
| 1048 | ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); |
---|
| 1049 | |
---|
| 1050 | for (i = 0; (i < NUM_REGEXPS) && (tsdPtr->patterns[i] != NULL); i++) { |
---|
| 1051 | regexpPtr = tsdPtr->regexps[i]; |
---|
| 1052 | if (--(regexpPtr->refCount) <= 0) { |
---|
| 1053 | FreeRegexp(regexpPtr); |
---|
| 1054 | } |
---|
| 1055 | ckfree(tsdPtr->patterns[i]); |
---|
| 1056 | tsdPtr->patterns[i] = NULL; |
---|
| 1057 | } |
---|
| 1058 | /* |
---|
| 1059 | * We may find ourselves reinitialized if another finalization routine |
---|
| 1060 | * invokes regexps. |
---|
| 1061 | */ |
---|
| 1062 | tsdPtr->initialized = 0; |
---|
| 1063 | } |
---|
| 1064 | |
---|
| 1065 | /* |
---|
| 1066 | * Local Variables: |
---|
| 1067 | * mode: c |
---|
| 1068 | * c-basic-offset: 4 |
---|
| 1069 | * fill-column: 78 |
---|
| 1070 | * End: |
---|
| 1071 | */ |
---|