1 | /* |
---|
2 | * DFA routines |
---|
3 | * This file is #included by regexec.c. |
---|
4 | * |
---|
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
---|
6 | * |
---|
7 | * Development of this software was funded, in part, by Cray Research Inc., |
---|
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
---|
9 | * Corporation, none of whom are responsible for the results. The author |
---|
10 | * thanks all of them. |
---|
11 | * |
---|
12 | * Redistribution and use in source and binary forms -- with or without |
---|
13 | * modification -- are permitted for any purpose, provided that |
---|
14 | * redistributions in source form retain this entire copyright notice and |
---|
15 | * indicate the origin and nature of any modifications. |
---|
16 | * |
---|
17 | * I'd appreciate being given credit for this package in the documentation |
---|
18 | * of software which uses it, but that is not a requirement. |
---|
19 | * |
---|
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
---|
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
---|
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
---|
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
---|
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
---|
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
---|
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
---|
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
---|
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
---|
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
30 | * |
---|
31 | */ |
---|
32 | |
---|
33 | /* |
---|
34 | - longest - longest-preferred matching engine |
---|
35 | ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); |
---|
36 | */ |
---|
37 | static chr * /* endpoint, or NULL */ |
---|
38 | longest( |
---|
39 | struct vars *v, /* used only for debug and exec flags */ |
---|
40 | struct dfa *d, |
---|
41 | chr *start, /* where the match should start */ |
---|
42 | chr *stop, /* match must end at or before here */ |
---|
43 | int *hitstopp) /* record whether hit v->stop, if non-NULL */ |
---|
44 | { |
---|
45 | chr *cp; |
---|
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; |
---|
47 | color co; |
---|
48 | struct sset *css; |
---|
49 | struct sset *ss; |
---|
50 | chr *post; |
---|
51 | int i; |
---|
52 | struct colormap *cm = d->cm; |
---|
53 | |
---|
54 | /* |
---|
55 | * Initialize. |
---|
56 | */ |
---|
57 | |
---|
58 | css = initialize(v, d, start); |
---|
59 | cp = start; |
---|
60 | if (hitstopp != NULL) { |
---|
61 | *hitstopp = 0; |
---|
62 | } |
---|
63 | |
---|
64 | /* |
---|
65 | * Startup. |
---|
66 | */ |
---|
67 | |
---|
68 | FDEBUG(("+++ startup +++\n")); |
---|
69 | if (cp == v->start) { |
---|
70 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; |
---|
71 | FDEBUG(("color %ld\n", (long)co)); |
---|
72 | } else { |
---|
73 | co = GETCOLOR(cm, *(cp - 1)); |
---|
74 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
---|
75 | } |
---|
76 | css = miss(v, d, css, co, cp, start); |
---|
77 | if (css == NULL) { |
---|
78 | return NULL; |
---|
79 | } |
---|
80 | css->lastseen = cp; |
---|
81 | |
---|
82 | /* |
---|
83 | * Main loop. |
---|
84 | */ |
---|
85 | |
---|
86 | if (v->eflags®_FTRACE) { |
---|
87 | while (cp < realstop) { |
---|
88 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); |
---|
89 | co = GETCOLOR(cm, *cp); |
---|
90 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
---|
91 | ss = css->outs[co]; |
---|
92 | if (ss == NULL) { |
---|
93 | ss = miss(v, d, css, co, cp+1, start); |
---|
94 | if (ss == NULL) { |
---|
95 | break; /* NOTE BREAK OUT */ |
---|
96 | } |
---|
97 | } |
---|
98 | cp++; |
---|
99 | ss->lastseen = cp; |
---|
100 | css = ss; |
---|
101 | } |
---|
102 | } else { |
---|
103 | while (cp < realstop) { |
---|
104 | co = GETCOLOR(cm, *cp); |
---|
105 | ss = css->outs[co]; |
---|
106 | if (ss == NULL) { |
---|
107 | ss = miss(v, d, css, co, cp+1, start); |
---|
108 | if (ss == NULL) { |
---|
109 | break; /* NOTE BREAK OUT */ |
---|
110 | } |
---|
111 | } |
---|
112 | cp++; |
---|
113 | ss->lastseen = cp; |
---|
114 | css = ss; |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | /* |
---|
119 | * Shutdown. |
---|
120 | */ |
---|
121 | |
---|
122 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); |
---|
123 | if (cp == v->stop && stop == v->stop) { |
---|
124 | if (hitstopp != NULL) { |
---|
125 | *hitstopp = 1; |
---|
126 | } |
---|
127 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; |
---|
128 | FDEBUG(("color %ld\n", (long)co)); |
---|
129 | ss = miss(v, d, css, co, cp, start); |
---|
130 | |
---|
131 | /* |
---|
132 | * Special case: match ended at eol? |
---|
133 | */ |
---|
134 | |
---|
135 | if (ss != NULL && (ss->flags&POSTSTATE)) { |
---|
136 | return cp; |
---|
137 | } else if (ss != NULL) { |
---|
138 | ss->lastseen = cp; /* to be tidy */ |
---|
139 | } |
---|
140 | } |
---|
141 | |
---|
142 | /* |
---|
143 | * Find last match, if any. |
---|
144 | */ |
---|
145 | |
---|
146 | post = d->lastpost; |
---|
147 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) { |
---|
148 | if ((ss->flags&POSTSTATE) && (post != ss->lastseen) && |
---|
149 | (post == NULL || post < ss->lastseen)) { |
---|
150 | post = ss->lastseen; |
---|
151 | } |
---|
152 | } |
---|
153 | if (post != NULL) { /* found one */ |
---|
154 | return post - 1; |
---|
155 | } |
---|
156 | |
---|
157 | return NULL; |
---|
158 | } |
---|
159 | |
---|
160 | /* |
---|
161 | - shortest - shortest-preferred matching engine |
---|
162 | ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, |
---|
163 | ^ chr **, int *); |
---|
164 | */ |
---|
165 | static chr * /* endpoint, or NULL */ |
---|
166 | shortest( |
---|
167 | struct vars *v, |
---|
168 | struct dfa *d, |
---|
169 | chr *start, /* where the match should start */ |
---|
170 | chr *min, /* match must end at or after here */ |
---|
171 | chr *max, /* match must end at or before here */ |
---|
172 | chr **coldp, /* store coldstart pointer here, if nonNULL */ |
---|
173 | int *hitstopp) /* record whether hit v->stop, if non-NULL */ |
---|
174 | { |
---|
175 | chr *cp; |
---|
176 | chr *realmin = (min == v->stop) ? min : min + 1; |
---|
177 | chr *realmax = (max == v->stop) ? max : max + 1; |
---|
178 | color co; |
---|
179 | struct sset *css; |
---|
180 | struct sset *ss; |
---|
181 | struct colormap *cm = d->cm; |
---|
182 | |
---|
183 | /* |
---|
184 | * Initialize. |
---|
185 | */ |
---|
186 | |
---|
187 | css = initialize(v, d, start); |
---|
188 | cp = start; |
---|
189 | if (hitstopp != NULL) { |
---|
190 | *hitstopp = 0; |
---|
191 | } |
---|
192 | |
---|
193 | /* |
---|
194 | * Startup. |
---|
195 | */ |
---|
196 | |
---|
197 | FDEBUG(("--- startup ---\n")); |
---|
198 | if (cp == v->start) { |
---|
199 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; |
---|
200 | FDEBUG(("color %ld\n", (long)co)); |
---|
201 | } else { |
---|
202 | co = GETCOLOR(cm, *(cp - 1)); |
---|
203 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
---|
204 | } |
---|
205 | css = miss(v, d, css, co, cp, start); |
---|
206 | if (css == NULL) { |
---|
207 | return NULL; |
---|
208 | } |
---|
209 | css->lastseen = cp; |
---|
210 | ss = css; |
---|
211 | |
---|
212 | /* |
---|
213 | * Main loop. |
---|
214 | */ |
---|
215 | |
---|
216 | if (v->eflags®_FTRACE) { |
---|
217 | while (cp < realmax) { |
---|
218 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); |
---|
219 | co = GETCOLOR(cm, *cp); |
---|
220 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
---|
221 | ss = css->outs[co]; |
---|
222 | if (ss == NULL) { |
---|
223 | ss = miss(v, d, css, co, cp+1, start); |
---|
224 | if (ss == NULL) { |
---|
225 | break; /* NOTE BREAK OUT */ |
---|
226 | } |
---|
227 | } |
---|
228 | cp++; |
---|
229 | ss->lastseen = cp; |
---|
230 | css = ss; |
---|
231 | if ((ss->flags&POSTSTATE) && cp >= realmin) { |
---|
232 | break; /* NOTE BREAK OUT */ |
---|
233 | } |
---|
234 | } |
---|
235 | } else { |
---|
236 | while (cp < realmax) { |
---|
237 | co = GETCOLOR(cm, *cp); |
---|
238 | ss = css->outs[co]; |
---|
239 | if (ss == NULL) { |
---|
240 | ss = miss(v, d, css, co, cp+1, start); |
---|
241 | if (ss == NULL) { |
---|
242 | break; /* NOTE BREAK OUT */ |
---|
243 | } |
---|
244 | } |
---|
245 | cp++; |
---|
246 | ss->lastseen = cp; |
---|
247 | css = ss; |
---|
248 | if ((ss->flags&POSTSTATE) && cp >= realmin) { |
---|
249 | break; /* NOTE BREAK OUT */ |
---|
250 | } |
---|
251 | } |
---|
252 | } |
---|
253 | |
---|
254 | if (ss == NULL) { |
---|
255 | return NULL; |
---|
256 | } |
---|
257 | |
---|
258 | if (coldp != NULL) { /* report last no-progress state set, if any */ |
---|
259 | *coldp = lastcold(v, d); |
---|
260 | } |
---|
261 | |
---|
262 | if ((ss->flags&POSTSTATE) && cp > min) { |
---|
263 | assert(cp >= realmin); |
---|
264 | cp--; |
---|
265 | } else if (cp == v->stop && max == v->stop) { |
---|
266 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; |
---|
267 | FDEBUG(("color %ld\n", (long)co)); |
---|
268 | ss = miss(v, d, css, co, cp, start); |
---|
269 | |
---|
270 | /* |
---|
271 | * Match might have ended at eol. |
---|
272 | */ |
---|
273 | |
---|
274 | if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) { |
---|
275 | *hitstopp = 1; |
---|
276 | } |
---|
277 | } |
---|
278 | |
---|
279 | if (ss == NULL || !(ss->flags&POSTSTATE)) { |
---|
280 | return NULL; |
---|
281 | } |
---|
282 | |
---|
283 | return cp; |
---|
284 | } |
---|
285 | |
---|
286 | /* |
---|
287 | - lastcold - determine last point at which no progress had been made |
---|
288 | ^ static chr *lastcold(struct vars *, struct dfa *); |
---|
289 | */ |
---|
290 | static chr * /* endpoint, or NULL */ |
---|
291 | lastcold( |
---|
292 | struct vars *v, |
---|
293 | struct dfa *d) |
---|
294 | { |
---|
295 | struct sset *ss; |
---|
296 | chr *nopr; |
---|
297 | int i; |
---|
298 | |
---|
299 | nopr = d->lastnopr; |
---|
300 | if (nopr == NULL) { |
---|
301 | nopr = v->start; |
---|
302 | } |
---|
303 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) { |
---|
304 | if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) { |
---|
305 | nopr = ss->lastseen; |
---|
306 | } |
---|
307 | } |
---|
308 | return nopr; |
---|
309 | } |
---|
310 | |
---|
311 | /* |
---|
312 | - newdfa - set up a fresh DFA |
---|
313 | ^ static struct dfa *newdfa(struct vars *, struct cnfa *, |
---|
314 | ^ struct colormap *, struct smalldfa *); |
---|
315 | */ |
---|
316 | static struct dfa * |
---|
317 | newdfa( |
---|
318 | struct vars *v, |
---|
319 | struct cnfa *cnfa, |
---|
320 | struct colormap *cm, |
---|
321 | struct smalldfa *small) /* preallocated space, may be NULL */ |
---|
322 | { |
---|
323 | struct dfa *d; |
---|
324 | size_t nss = cnfa->nstates * 2; |
---|
325 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; |
---|
326 | struct smalldfa *smallwas = small; |
---|
327 | |
---|
328 | assert(cnfa != NULL && cnfa->nstates != 0); |
---|
329 | |
---|
330 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { |
---|
331 | assert(wordsper == 1); |
---|
332 | if (small == NULL) { |
---|
333 | small = (struct smalldfa *) MALLOC(sizeof(struct smalldfa)); |
---|
334 | if (small == NULL) { |
---|
335 | ERR(REG_ESPACE); |
---|
336 | return NULL; |
---|
337 | } |
---|
338 | } |
---|
339 | d = &small->dfa; |
---|
340 | d->ssets = small->ssets; |
---|
341 | d->statesarea = small->statesarea; |
---|
342 | d->work = &d->statesarea[nss]; |
---|
343 | d->outsarea = small->outsarea; |
---|
344 | d->incarea = small->incarea; |
---|
345 | d->cptsmalloced = 0; |
---|
346 | d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; |
---|
347 | } else { |
---|
348 | d = (struct dfa *)MALLOC(sizeof(struct dfa)); |
---|
349 | if (d == NULL) { |
---|
350 | ERR(REG_ESPACE); |
---|
351 | return NULL; |
---|
352 | } |
---|
353 | d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); |
---|
354 | d->statesarea = (unsigned *) |
---|
355 | MALLOC((nss+WORK) * wordsper * sizeof(unsigned)); |
---|
356 | d->work = &d->statesarea[nss * wordsper]; |
---|
357 | d->outsarea = (struct sset **) |
---|
358 | MALLOC(nss * cnfa->ncolors * sizeof(struct sset *)); |
---|
359 | d->incarea = (struct arcp *) |
---|
360 | MALLOC(nss * cnfa->ncolors * sizeof(struct arcp)); |
---|
361 | d->cptsmalloced = 1; |
---|
362 | d->mallocarea = (char *)d; |
---|
363 | if (d->ssets == NULL || d->statesarea == NULL || |
---|
364 | d->outsarea == NULL || d->incarea == NULL) { |
---|
365 | freedfa(d); |
---|
366 | ERR(REG_ESPACE); |
---|
367 | return NULL; |
---|
368 | } |
---|
369 | } |
---|
370 | |
---|
371 | d->nssets = (v->eflags®_SMALL) ? 7 : nss; |
---|
372 | d->nssused = 0; |
---|
373 | d->nstates = cnfa->nstates; |
---|
374 | d->ncolors = cnfa->ncolors; |
---|
375 | d->wordsper = wordsper; |
---|
376 | d->cnfa = cnfa; |
---|
377 | d->cm = cm; |
---|
378 | d->lastpost = NULL; |
---|
379 | d->lastnopr = NULL; |
---|
380 | d->search = d->ssets; |
---|
381 | |
---|
382 | /* |
---|
383 | * Initialization of sset fields is done as needed. |
---|
384 | */ |
---|
385 | |
---|
386 | return d; |
---|
387 | } |
---|
388 | |
---|
389 | /* |
---|
390 | - freedfa - free a DFA |
---|
391 | ^ static VOID freedfa(struct dfa *); |
---|
392 | */ |
---|
393 | static void |
---|
394 | freedfa( |
---|
395 | struct dfa *d) |
---|
396 | { |
---|
397 | if (d->cptsmalloced) { |
---|
398 | if (d->ssets != NULL) { |
---|
399 | FREE(d->ssets); |
---|
400 | } |
---|
401 | if (d->statesarea != NULL) { |
---|
402 | FREE(d->statesarea); |
---|
403 | } |
---|
404 | if (d->outsarea != NULL) { |
---|
405 | FREE(d->outsarea); |
---|
406 | } |
---|
407 | if (d->incarea != NULL) { |
---|
408 | FREE(d->incarea); |
---|
409 | } |
---|
410 | } |
---|
411 | |
---|
412 | if (d->mallocarea != NULL) { |
---|
413 | FREE(d->mallocarea); |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | /* |
---|
418 | - hash - construct a hash code for a bitvector |
---|
419 | * There are probably better ways, but they're more expensive. |
---|
420 | ^ static unsigned hash(unsigned *, int); |
---|
421 | */ |
---|
422 | static unsigned |
---|
423 | hash( |
---|
424 | unsigned *uv, |
---|
425 | int n) |
---|
426 | { |
---|
427 | int i; |
---|
428 | unsigned h; |
---|
429 | |
---|
430 | h = 0; |
---|
431 | for (i = 0; i < n; i++) { |
---|
432 | h ^= uv[i]; |
---|
433 | } |
---|
434 | return h; |
---|
435 | } |
---|
436 | |
---|
437 | /* |
---|
438 | - initialize - hand-craft a cache entry for startup, otherwise get ready |
---|
439 | ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); |
---|
440 | */ |
---|
441 | static struct sset * |
---|
442 | initialize( |
---|
443 | struct vars *v, /* used only for debug flags */ |
---|
444 | struct dfa *d, |
---|
445 | chr *start) |
---|
446 | { |
---|
447 | struct sset *ss; |
---|
448 | int i; |
---|
449 | |
---|
450 | /* |
---|
451 | * Is previous one still there? |
---|
452 | */ |
---|
453 | |
---|
454 | if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) { |
---|
455 | ss = &d->ssets[0]; |
---|
456 | } else { /* no, must (re)build it */ |
---|
457 | ss = getvacant(v, d, start, start); |
---|
458 | for (i = 0; i < d->wordsper; i++) { |
---|
459 | ss->states[i] = 0; |
---|
460 | } |
---|
461 | BSET(ss->states, d->cnfa->pre); |
---|
462 | ss->hash = HASH(ss->states, d->wordsper); |
---|
463 | assert(d->cnfa->pre != d->cnfa->post); |
---|
464 | ss->flags = STARTER|LOCKED|NOPROGRESS; |
---|
465 | |
---|
466 | /* |
---|
467 | * lastseen dealt with below |
---|
468 | */ |
---|
469 | } |
---|
470 | |
---|
471 | for (i = 0; i < d->nssused; i++) { |
---|
472 | d->ssets[i].lastseen = NULL; |
---|
473 | } |
---|
474 | ss->lastseen = start; /* maybe untrue, but harmless */ |
---|
475 | d->lastpost = NULL; |
---|
476 | d->lastnopr = NULL; |
---|
477 | return ss; |
---|
478 | } |
---|
479 | |
---|
480 | /* |
---|
481 | - miss - handle a cache miss |
---|
482 | ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, |
---|
483 | ^ pcolor, chr *, chr *); |
---|
484 | */ |
---|
485 | static struct sset * /* NULL if goes to empty set */ |
---|
486 | miss( |
---|
487 | struct vars *v, /* used only for debug flags */ |
---|
488 | struct dfa *d, |
---|
489 | struct sset *css, |
---|
490 | pcolor co, |
---|
491 | chr *cp, /* next chr */ |
---|
492 | chr *start) /* where the attempt got started */ |
---|
493 | { |
---|
494 | struct cnfa *cnfa = d->cnfa; |
---|
495 | int i; |
---|
496 | unsigned h; |
---|
497 | struct carc *ca; |
---|
498 | struct sset *p; |
---|
499 | int ispost; |
---|
500 | int noprogress; |
---|
501 | int gotstate; |
---|
502 | int dolacons; |
---|
503 | int sawlacons; |
---|
504 | |
---|
505 | /* |
---|
506 | * For convenience, we can be called even if it might not be a miss. |
---|
507 | */ |
---|
508 | |
---|
509 | if (css->outs[co] != NULL) { |
---|
510 | FDEBUG(("hit\n")); |
---|
511 | return css->outs[co]; |
---|
512 | } |
---|
513 | FDEBUG(("miss\n")); |
---|
514 | |
---|
515 | /* |
---|
516 | * First, what set of states would we end up in? |
---|
517 | */ |
---|
518 | |
---|
519 | for (i = 0; i < d->wordsper; i++) { |
---|
520 | d->work[i] = 0; |
---|
521 | } |
---|
522 | ispost = 0; |
---|
523 | noprogress = 1; |
---|
524 | gotstate = 0; |
---|
525 | for (i = 0; i < d->nstates; i++) { |
---|
526 | if (ISBSET(css->states, i)) { |
---|
527 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) { |
---|
528 | if (ca->co == co) { |
---|
529 | BSET(d->work, ca->to); |
---|
530 | gotstate = 1; |
---|
531 | if (ca->to == cnfa->post) { |
---|
532 | ispost = 1; |
---|
533 | } |
---|
534 | if (!cnfa->states[ca->to]->co) { |
---|
535 | noprogress = 0; |
---|
536 | } |
---|
537 | FDEBUG(("%d -> %d\n", i, ca->to)); |
---|
538 | } |
---|
539 | } |
---|
540 | } |
---|
541 | } |
---|
542 | dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; |
---|
543 | sawlacons = 0; |
---|
544 | while (dolacons) { /* transitive closure */ |
---|
545 | dolacons = 0; |
---|
546 | for (i = 0; i < d->nstates; i++) { |
---|
547 | if (ISBSET(d->work, i)) { |
---|
548 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) { |
---|
549 | if (ca->co <= cnfa->ncolors) { |
---|
550 | continue; /* NOTE CONTINUE */ |
---|
551 | } |
---|
552 | sawlacons = 1; |
---|
553 | if (ISBSET(d->work, ca->to)) { |
---|
554 | continue; /* NOTE CONTINUE */ |
---|
555 | } |
---|
556 | if (!lacon(v, cnfa, cp, ca->co)) { |
---|
557 | continue; /* NOTE CONTINUE */ |
---|
558 | } |
---|
559 | BSET(d->work, ca->to); |
---|
560 | dolacons = 1; |
---|
561 | if (ca->to == cnfa->post) { |
---|
562 | ispost = 1; |
---|
563 | } |
---|
564 | if (!cnfa->states[ca->to]->co) { |
---|
565 | noprogress = 0; |
---|
566 | } |
---|
567 | FDEBUG(("%d :> %d\n", i, ca->to)); |
---|
568 | } |
---|
569 | } |
---|
570 | } |
---|
571 | } |
---|
572 | if (!gotstate) { |
---|
573 | return NULL; |
---|
574 | } |
---|
575 | h = HASH(d->work, d->wordsper); |
---|
576 | |
---|
577 | /* |
---|
578 | * Next, is that in the cache? |
---|
579 | */ |
---|
580 | |
---|
581 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) { |
---|
582 | if (HIT(h, d->work, p, d->wordsper)) { |
---|
583 | FDEBUG(("cached c%d\n", p - d->ssets)); |
---|
584 | break; /* NOTE BREAK OUT */ |
---|
585 | } |
---|
586 | } |
---|
587 | if (i == 0) { /* nope, need a new cache entry */ |
---|
588 | p = getvacant(v, d, cp, start); |
---|
589 | assert(p != css); |
---|
590 | for (i = 0; i < d->wordsper; i++) { |
---|
591 | p->states[i] = d->work[i]; |
---|
592 | } |
---|
593 | p->hash = h; |
---|
594 | p->flags = (ispost) ? POSTSTATE : 0; |
---|
595 | if (noprogress) { |
---|
596 | p->flags |= NOPROGRESS; |
---|
597 | } |
---|
598 | |
---|
599 | /* |
---|
600 | * lastseen to be dealt with by caller |
---|
601 | */ |
---|
602 | } |
---|
603 | |
---|
604 | if (!sawlacons) { /* lookahead conds. always cache miss */ |
---|
605 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); |
---|
606 | css->outs[co] = p; |
---|
607 | css->inchain[co] = p->ins; |
---|
608 | p->ins.ss = css; |
---|
609 | p->ins.co = (color)co; |
---|
610 | } |
---|
611 | return p; |
---|
612 | } |
---|
613 | |
---|
614 | /* |
---|
615 | - lacon - lookahead-constraint checker for miss() |
---|
616 | ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); |
---|
617 | */ |
---|
618 | static int /* predicate: constraint satisfied? */ |
---|
619 | lacon( |
---|
620 | struct vars *v, |
---|
621 | struct cnfa *pcnfa, /* parent cnfa */ |
---|
622 | chr *cp, |
---|
623 | pcolor co) /* "color" of the lookahead constraint */ |
---|
624 | { |
---|
625 | int n; |
---|
626 | struct subre *sub; |
---|
627 | struct dfa *d; |
---|
628 | struct smalldfa sd; |
---|
629 | chr *end; |
---|
630 | |
---|
631 | n = co - pcnfa->ncolors; |
---|
632 | assert(n < v->g->nlacons && v->g->lacons != NULL); |
---|
633 | FDEBUG(("=== testing lacon %d\n", n)); |
---|
634 | sub = &v->g->lacons[n]; |
---|
635 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); |
---|
636 | if (d == NULL) { |
---|
637 | ERR(REG_ESPACE); |
---|
638 | return 0; |
---|
639 | } |
---|
640 | end = longest(v, d, cp, v->stop, (int *)NULL); |
---|
641 | freedfa(d); |
---|
642 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); |
---|
643 | return (sub->subno) ? (end != NULL) : (end == NULL); |
---|
644 | } |
---|
645 | |
---|
646 | /* |
---|
647 | - getvacant - get a vacant state set |
---|
648 | * This routine clears out the inarcs and outarcs, but does not otherwise |
---|
649 | * clear the innards of the state set -- that's up to the caller. |
---|
650 | ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); |
---|
651 | */ |
---|
652 | static struct sset * |
---|
653 | getvacant( |
---|
654 | struct vars *v, /* used only for debug flags */ |
---|
655 | struct dfa *d, |
---|
656 | chr *cp, |
---|
657 | chr *start) |
---|
658 | { |
---|
659 | int i; |
---|
660 | struct sset *ss; |
---|
661 | struct sset *p; |
---|
662 | struct arcp ap; |
---|
663 | struct arcp lastap = {NULL, 0}; /* silence gcc 4 warning */ |
---|
664 | color co; |
---|
665 | |
---|
666 | ss = pickss(v, d, cp, start); |
---|
667 | assert(!(ss->flags&LOCKED)); |
---|
668 | |
---|
669 | /* |
---|
670 | * Clear out its inarcs, including self-referential ones. |
---|
671 | */ |
---|
672 | |
---|
673 | ap = ss->ins; |
---|
674 | while ((p = ap.ss) != NULL) { |
---|
675 | co = ap.co; |
---|
676 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); |
---|
677 | p->outs[co] = NULL; |
---|
678 | ap = p->inchain[co]; |
---|
679 | p->inchain[co].ss = NULL; /* paranoia */ |
---|
680 | } |
---|
681 | ss->ins.ss = NULL; |
---|
682 | |
---|
683 | /* |
---|
684 | * Take it off the inarc chains of the ssets reached by its outarcs. |
---|
685 | */ |
---|
686 | |
---|
687 | for (i = 0; i < d->ncolors; i++) { |
---|
688 | p = ss->outs[i]; |
---|
689 | assert(p != ss); /* not self-referential */ |
---|
690 | if (p == NULL) { |
---|
691 | continue; /* NOTE CONTINUE */ |
---|
692 | } |
---|
693 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); |
---|
694 | if (p->ins.ss == ss && p->ins.co == i) { |
---|
695 | p->ins = ss->inchain[i]; |
---|
696 | } else { |
---|
697 | assert(p->ins.ss != NULL); |
---|
698 | for (ap = p->ins; ap.ss != NULL && |
---|
699 | !(ap.ss == ss && ap.co == i); |
---|
700 | ap = ap.ss->inchain[ap.co]) { |
---|
701 | lastap = ap; |
---|
702 | } |
---|
703 | assert(ap.ss != NULL); |
---|
704 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; |
---|
705 | } |
---|
706 | ss->outs[i] = NULL; |
---|
707 | ss->inchain[i].ss = NULL; |
---|
708 | } |
---|
709 | |
---|
710 | /* |
---|
711 | * If ss was a success state, may need to remember location. |
---|
712 | */ |
---|
713 | |
---|
714 | if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && |
---|
715 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) { |
---|
716 | d->lastpost = ss->lastseen; |
---|
717 | } |
---|
718 | |
---|
719 | /* |
---|
720 | * Likewise for a no-progress state. |
---|
721 | */ |
---|
722 | |
---|
723 | if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && |
---|
724 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) { |
---|
725 | d->lastnopr = ss->lastseen; |
---|
726 | } |
---|
727 | |
---|
728 | return ss; |
---|
729 | } |
---|
730 | |
---|
731 | /* |
---|
732 | - pickss - pick the next stateset to be used |
---|
733 | ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); |
---|
734 | */ |
---|
735 | static struct sset * |
---|
736 | pickss( |
---|
737 | struct vars *v, /* used only for debug flags */ |
---|
738 | struct dfa *d, |
---|
739 | chr *cp, |
---|
740 | chr *start) |
---|
741 | { |
---|
742 | int i; |
---|
743 | struct sset *ss; |
---|
744 | struct sset *end; |
---|
745 | chr *ancient; |
---|
746 | |
---|
747 | /* |
---|
748 | * Shortcut for cases where cache isn't full. |
---|
749 | */ |
---|
750 | |
---|
751 | if (d->nssused < d->nssets) { |
---|
752 | i = d->nssused; |
---|
753 | d->nssused++; |
---|
754 | ss = &d->ssets[i]; |
---|
755 | FDEBUG(("new c%d\n", i)); |
---|
756 | |
---|
757 | /* |
---|
758 | * Set up innards. |
---|
759 | */ |
---|
760 | |
---|
761 | ss->states = &d->statesarea[i * d->wordsper]; |
---|
762 | ss->flags = 0; |
---|
763 | ss->ins.ss = NULL; |
---|
764 | ss->ins.co = WHITE; /* give it some value */ |
---|
765 | ss->outs = &d->outsarea[i * d->ncolors]; |
---|
766 | ss->inchain = &d->incarea[i * d->ncolors]; |
---|
767 | for (i = 0; i < d->ncolors; i++) { |
---|
768 | ss->outs[i] = NULL; |
---|
769 | ss->inchain[i].ss = NULL; |
---|
770 | } |
---|
771 | return ss; |
---|
772 | } |
---|
773 | |
---|
774 | /* |
---|
775 | * Look for oldest, or old enough anyway. |
---|
776 | */ |
---|
777 | |
---|
778 | if (cp - start > d->nssets*2/3) { /* oldest 33% are expendable */ |
---|
779 | ancient = cp - d->nssets*2/3; |
---|
780 | } else { |
---|
781 | ancient = start; |
---|
782 | } |
---|
783 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) { |
---|
784 | if ((ss->lastseen == NULL || ss->lastseen < ancient) |
---|
785 | && !(ss->flags&LOCKED)) { |
---|
786 | d->search = ss + 1; |
---|
787 | FDEBUG(("replacing c%d\n", ss - d->ssets)); |
---|
788 | return ss; |
---|
789 | } |
---|
790 | } |
---|
791 | for (ss = d->ssets, end = d->search; ss < end; ss++) { |
---|
792 | if ((ss->lastseen == NULL || ss->lastseen < ancient) |
---|
793 | && !(ss->flags&LOCKED)) { |
---|
794 | d->search = ss + 1; |
---|
795 | FDEBUG(("replacing c%d\n", ss - d->ssets)); |
---|
796 | return ss; |
---|
797 | } |
---|
798 | } |
---|
799 | |
---|
800 | /* |
---|
801 | * Nobody's old enough?!? -- something's really wrong. |
---|
802 | */ |
---|
803 | |
---|
804 | FDEBUG(("can't find victim to replace!\n")); |
---|
805 | assert(NOTREACHED); |
---|
806 | ERR(REG_ASSERT); |
---|
807 | return d->ssets; |
---|
808 | } |
---|
809 | |
---|
810 | /* |
---|
811 | * Local Variables: |
---|
812 | * mode: c |
---|
813 | * c-basic-offset: 4 |
---|
814 | * fill-column: 78 |
---|
815 | * End: |
---|
816 | */ |
---|