Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: downloads/tcl8.5.2/generic/regcomp.c @ 35

Last change on this file since 35 was 25, checked in by landauf, 18 years ago
added tcl to libs
File size: 52.9 KB

Rev	Line
[25]	1	/*
	2	* re_*comp and friends - compile REs
	3	* This file #includes several others (see the bottom).
	4	*
	5	* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
	6	*
	7	* Development of this software was funded, in part, by Cray Research Inc.,
	8	* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
	9	* Corporation, none of whom are responsible for the results. The author
	10	* thanks all of them.
	11	*
	12	* Redistribution and use in source and binary forms -- with or without
	13	* modification -- are permitted for any purpose, provided that
	14	* redistributions in source form retain this entire copyright notice and
	15	* indicate the origin and nature of any modifications.
	16	*
	17	* I'd appreciate being given credit for this package in the documentation of
	18	* software which uses it, but that is not a requirement.
	19	*
	20	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
	21	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
	22	* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
	23	* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	24	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	25	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
	26	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
	27	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
	28	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
	29	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	30	*
	31	*/
	32
	33	#include "regguts.h"
	34
	35	/*
	36	* forward declarations, up here so forward datatypes etc. are defined early
	37	*/
	38	/* =====^!^===== begin forwards =====^!^===== */
	39	/* automatically gathered by fwd; do not hand-edit */
	40	/* === regcomp.c === */
	41	int compile(regex_t , const chr , size_t, int);
	42	static void moresubs(struct vars *, int);
	43	static int freev(struct vars *, int);
	44	static void makesearch(struct vars , struct nfa );
	45	static struct subre parse(struct vars , int, int, struct state , struct state );
	46	static struct subre parsebranch(struct vars , int, int, struct state , struct state , int);
	47	static void parseqatom(struct vars , int, int, struct state , struct state , struct subre );
	48	static void nonword(struct vars , int, struct state , struct state *);
	49	static void word(struct vars , int, struct state , struct state *);
	50	static int scannum(struct vars *);
	51	static void repeat(struct vars , struct state , struct state *, int, int);
	52	static void bracket(struct vars , struct state , struct state *);
	53	static void cbracket(struct vars , struct state , struct state *);
	54	static void brackpart(struct vars , struct state , struct state *);
	55	static const chr scanplain(struct vars );
	56	static void onechr(struct vars , pchr, struct state , struct state *);
	57	static void dovec(struct vars , struct cvec , struct state , struct state );
	58	static void wordchrs(struct vars *);
	59	static struct subre subre(struct vars , int, int, struct state , struct state );
	60	static void freesubre(struct vars , struct subre );
	61	static void freesrnode(struct vars , struct subre );
	62	static void optst(struct vars , struct subre );
	63	static int numst(struct subre *, int);
	64	static void markst(struct subre *);
	65	static void cleanst(struct vars *);
	66	static long nfatree(struct vars , struct subre , FILE *);
	67	static long nfanode(struct vars , struct subre , FILE *);
	68	static int newlacon(struct vars , struct state , struct state *, int);
	69	static void freelacons(struct subre *, int);
	70	static void rfree(regex_t *);
	71	static void dump(regex_t , FILE );
	72	static void dumpst(struct subre , FILE , int);
	73	static void stdump(struct subre , FILE , int);
	74	static const char stid(struct subre , char *, size_t);
	75	/* === regc_lex.c === */
	76	static void lexstart(struct vars *);
	77	static void prefixes(struct vars *);
	78	static void lexnest(struct vars , const chr , const chr *);
	79	static void lexword(struct vars *);
	80	static int next(struct vars *);
	81	static int lexescape(struct vars *);
	82	static chr lexdigits(struct vars *, int, int, int);
	83	static int brenext(struct vars *, pchr);
	84	static void skip(struct vars *);
	85	static chr newline(NOPARMS);
	86	#ifdef REG_DEBUG
	87	static const chr *ch(NOPARMS);
	88	#endif
	89	static chr chrnamed(struct vars , const chr , const chr *, pchr);
	90	/* === regc_color.c === */
	91	static void initcm(struct vars , struct colormap );
	92	static void freecm(struct colormap *);
	93	static void cmtreefree(struct colormap , union tree , int);
	94	static color setcolor(struct colormap *, pchr, pcolor);
	95	static color maxcolor(struct colormap *);
	96	static color newcolor(struct colormap *);
	97	static void freecolor(struct colormap *, pcolor);
	98	static color pseudocolor(struct colormap *);
	99	static color subcolor(struct colormap *, pchr c);
	100	static color newsub(struct colormap *, pcolor);
	101	static void subrange(struct vars , pchr, pchr, struct state , struct state *);
	102	static void subblock(struct vars , pchr, struct state , struct state *);
	103	static void okcolors(struct nfa , struct colormap );
	104	static void colorchain(struct colormap , struct arc );
	105	static void uncolorchain(struct colormap , struct arc );
	106	static void rainbow(struct nfa , struct colormap , int, pcolor, struct state , struct state );
	107	static void colorcomplement(struct nfa , struct colormap , int, struct state , struct state , struct state *);
	108	#ifdef REG_DEBUG
	109	static void dumpcolors(struct colormap , FILE );
	110	static void fillcheck(struct colormap , union tree , int, FILE *);
	111	static void dumpchr(pchr, FILE *);
	112	#endif
	113	/* === regc_nfa.c === */
	114	static struct nfa newnfa(struct vars , struct colormap , struct nfa );
	115	static void freenfa(struct nfa *);
	116	static struct state newstate(struct nfa );
	117	static struct state newfstate(struct nfa , int flag);
	118	static void dropstate(struct nfa , struct state );
	119	static void freestate(struct nfa , struct state );
	120	static void destroystate(struct nfa , struct state );
	121	static void newarc(struct nfa , int, pcolor, struct state , struct state *);
	122	static struct arc allocarc(struct nfa , struct state *);
	123	static void freearc(struct nfa , struct arc );
	124	static struct arc findarc(struct state , int, pcolor);
	125	static void cparc(struct nfa , struct arc , struct state , struct state );
	126	static void moveins(struct nfa , struct state , struct state *);
	127	static void copyins(struct nfa , struct state , struct state *);
	128	static void moveouts(struct nfa , struct state , struct state *);
	129	static void copyouts(struct nfa , struct state , struct state *);
	130	static void cloneouts(struct nfa , struct state , struct state , struct state , int);
	131	static void delsub(struct nfa , struct state , struct state *);
	132	static void deltraverse(struct nfa , struct state , struct state *);
	133	static void dupnfa(struct nfa , struct state , struct state , struct state , struct state *);
	134	static void duptraverse(struct nfa , struct state , struct state *);
	135	static void cleartraverse(struct nfa , struct state );
	136	static void specialcolors(struct nfa *);
	137	static long optimize(struct nfa , FILE );
	138	static void pullback(struct nfa , FILE );
	139	static int pull(struct nfa , struct arc );
	140	static void pushfwd(struct nfa , FILE );
	141	static int push(struct nfa , struct arc );
	142	#define INCOMPATIBLE 1 /* destroys arc */
	143	#define SATISFIED 2 /* constraint satisfied */
	144	#define COMPATIBLE 3 /* compatible but not satisfied yet */
	145	static int combine(struct arc , struct arc );
	146	static void fixempties(struct nfa , FILE );
	147	static int unempty(struct nfa , struct arc );
	148	static void cleanup(struct nfa *);
	149	static void markreachable(struct nfa , struct state , struct state , struct state );
	150	static void markcanreach(struct nfa , struct state , struct state , struct state );
	151	static long analyze(struct nfa *);
	152	static void compact(struct nfa , struct cnfa );
	153	static void carcsort(struct carc , struct carc );
	154	static void freecnfa(struct cnfa *);
	155	static void dumpnfa(struct nfa , FILE );
	156	#ifdef REG_DEBUG
	157	static void dumpstate(struct state , FILE );
	158	static void dumparcs(struct state , FILE );
	159	static int dumprarcs(struct arc , struct state , FILE *, int);
	160	static void dumparc(struct arc , struct state , FILE *);
	161	#endif
	162	static void dumpcnfa(struct cnfa , FILE );
	163	#ifdef REG_DEBUG
	164	static void dumpcstate(int, struct carc , struct cnfa , FILE *);
	165	#endif
	166	/* === regc_cvec.c === */
	167	static struct cvec clearcvec(struct cvec );
	168	static void addchr(struct cvec *, pchr);
	169	static void addrange(struct cvec *, pchr, pchr);
	170	static struct cvec *newcvec(int, int);
	171	static struct cvec getcvec(struct vars , int, int);
	172	static void freecvec(struct cvec *);
	173	/* === regc_locale.c === */
	174	static celt element(struct vars , const chr , const chr *);
	175	static struct cvec range(struct vars , celt, celt, int);
	176	static int before(celt, celt);
	177	static struct cvec eclass(struct vars , celt, int);
	178	static struct cvec cclass(struct vars , const chr , const chr , int);
	179	static struct cvec allcases(struct vars , pchr);
	180	static int cmp(const chr , const chr , size_t);
	181	static int casecmp(const chr , const chr , size_t);
	182	/* automatically gathered by fwd; do not hand-edit */
	183	/* =====^!^===== end forwards =====^!^===== */
	184
	185	/* internal variables, bundled for easy passing around */
	186	struct vars {
	187	regex_t *re;
	188	const chr now; / scan pointer into string */
	189	const chr stop; / end of string */
	190	const chr savenow; / saved now and stop for "subroutine call" */
	191	const chr *savestop;
	192	int err; /* error code (0 if none) */
	193	int cflags; /* copy of compile flags */
	194	int lasttype; /* type of previous token */
	195	int nexttype; /* type of next token */
	196	chr nextvalue; /* value (if any) of next token */
	197	int lexcon; /* lexical context type (see lex.c) */
	198	int nsubexp; /* subexpression count */
	199	struct subre *subs; / subRE pointer vector */
	200	size_t nsubs; /* length of vector */
	201	struct subre sub10[10]; / initial vector, enough for most */
	202	struct nfa nfa; / the NFA */
	203	struct colormap cm; / character color map */
	204	color nlcolor; /* color of newline */
	205	struct state wordchrs; / state in nfa holding word-char outarcs */
	206	struct subre tree; / subexpression tree */
	207	struct subre treechain; / all tree nodes allocated */
	208	struct subre treefree; / any free tree nodes */
	209	int ntree; /* number of tree nodes */
	210	struct cvec cv; / interface cvec */
	211	struct cvec cv2; / utility cvec */
	212	struct subre lacons; / lookahead-constraint vector */
	213	int nlacons; /* size of lacons */
	214	};
	215
	216	/* parsing macros; most know that `v' is the struct vars pointer */
	217	#define NEXT() (next(v)) /* advance by one token */
	218	#define SEE(t) (v->nexttype == (t)) /* is next token this? */
	219	#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */
	220	#define VISERR(vv) ((vv)->err != 0)/* have we seen an error yet? */
	221	#define ISERR() VISERR(v)
	222	#define VERR(vv,e) \
	223	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e)))
	224	#define ERR(e) VERR(v, e) /* record an error */
	225	#define NOERR() {if (ISERR()) return;} /* if error seen, return */
	226	#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
	227	#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
	228	#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
	229	#define NOTE(b) (v->re->re_info \|= (b)) /* note visible condition */
	230	#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
	231
	232	/* token type codes, some also used as NFA arc types */
	233	#define EMPTY 'n' /* no token present */
	234	#define EOS 'e' /* end of string */
	235	#define PLAIN 'p' /* ordinary character */
	236	#define DIGIT 'd' /* digit (in bound) */
	237	#define BACKREF 'b' /* back reference */
	238	#define COLLEL 'I' /* start of [. */
	239	#define ECLASS 'E' /* start of [= */
	240	#define CCLASS 'C' /* start of [: */
	241	#define END 'X' /* end of [. [= [: */
	242	#define RANGE 'R' /* - within [] which might be range delim. */
	243	#define LACON 'L' /* lookahead constraint subRE */
	244	#define AHEAD 'a' /* color-lookahead arc */
	245	#define BEHIND 'r' /* color-lookbehind arc */
	246	#define WBDRY 'w' /* word boundary constraint */
	247	#define NWBDRY 'W' /* non-word-boundary constraint */
	248	#define SBEGIN 'A' /* beginning of string (even if not BOL) */
	249	#define SEND 'Z' /* end of string (even if not EOL) */
	250	#define PREFER 'P' /* length preference */
	251
	252	/* is an arc colored, and hence on a color chain? */
	253	#define COLORED(a) \
	254	((a)->type == PLAIN \|\| (a)->type == AHEAD \|\| (a)->type == BEHIND)
	255
	256	/* static function list */
	257	static struct fns functions = {
	258	rfree, /* regfree insides */
	259	};
	260
	261	/*
	262	- compile - compile regular expression
	263	^ int compile(regex_t , const chr , size_t, int);
	264	*/
	265	int
	266	compile(
	267	regex_t *re,
	268	const chr *string,
	269	size_t len,
	270	int flags)
	271	{
	272	AllocVars(v);
	273	struct guts *g;
	274	int i;
	275	size_t j;
	276	FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
	277	#define CNOERR() { if (ISERR()) return freev(v, v->err); }
	278
	279	/*
	280	* Sanity checks.
	281	*/
	282
	283	if (re == NULL \|\| string == NULL) {
	284	FreeVars(v);
	285	return REG_INVARG;
	286	}
	287	if ((flags&REG_QUOTE) && (flags&(REG_ADVANCED\|REG_EXPANDED\|REG_NEWLINE))) {
	288	FreeVars(v);
	289	return REG_INVARG;
	290	}
	291	if (!(flags&REG_EXTENDED) && (flags&REG_ADVF)) {
	292	FreeVars(v);
	293	return REG_INVARG;
	294	}
	295
	296	/*
	297	* Initial setup (after which freev() is callable).
	298	*/
	299
	300	v->re = re;
	301	v->now = string;
	302	v->stop = v->now + len;
	303	v->savenow = v->savestop = NULL;
	304	v->err = 0;
	305	v->cflags = flags;
	306	v->nsubexp = 0;
	307	v->subs = v->sub10;
	308	v->nsubs = 10;
	309	for (j = 0; j < v->nsubs; j++) {
	310	v->subs[j] = NULL;
	311	}
	312	v->nfa = NULL;
	313	v->cm = NULL;
	314	v->nlcolor = COLORLESS;
	315	v->wordchrs = NULL;
	316	v->tree = NULL;
	317	v->treechain = NULL;
	318	v->treefree = NULL;
	319	v->cv = NULL;
	320	v->cv2 = NULL;
	321	v->lacons = NULL;
	322	v->nlacons = 0;
	323	re->re_magic = REMAGIC;
	324	re->re_info = 0; /* bits get set during parse */
	325	re->re_csize = sizeof(chr);
	326	re->re_guts = NULL;
	327	re->re_fns = VS(&functions);
	328
	329	/*
	330	* More complex setup, malloced things.
	331	*/
	332
	333	re->re_guts = VS(MALLOC(sizeof(struct guts)));
	334	if (re->re_guts == NULL) {
	335	return freev(v, REG_ESPACE);
	336	}
	337	g = (struct guts *) re->re_guts;
	338	g->tree = NULL;
	339	initcm(v, &g->cmap);
	340	v->cm = &g->cmap;
	341	g->lacons = NULL;
	342	g->nlacons = 0;
	343	ZAPCNFA(g->search);
	344	v->nfa = newnfa(v, v->cm, NULL);
	345	CNOERR();
	346	v->cv = newcvec(100, 20);
	347	if (v->cv == NULL) {
	348	return freev(v, REG_ESPACE);
	349	}
	350
	351	/*
	352	* Parsing.
	353	*/
	354
	355	lexstart(v); /* also handles prefixes */
	356	if ((v->cflags&REG_NLSTOP) \|\| (v->cflags&REG_NLANCH)) {
	357	/*
	358	* Assign newline a unique color.
	359	*/
	360
	361	v->nlcolor = subcolor(v->cm, newline());
	362	okcolors(v->nfa, v->cm);
	363	}
	364	CNOERR();
	365	v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
	366	assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
	367	CNOERR();
	368	assert(v->tree != NULL);
	369
	370	/*
	371	* Finish setup of nfa and its subre tree.
	372	*/
	373
	374	specialcolors(v->nfa);
	375	CNOERR();
	376	if (debug != NULL) {
	377	fprintf(debug, "\n\n\n========= RAW ==========\n");
	378	dumpnfa(v->nfa, debug);
	379	dumpst(v->tree, debug, 1);
	380	}
	381	optst(v, v->tree);
	382	v->ntree = numst(v->tree, 1);
	383	markst(v->tree);
	384	cleanst(v);
	385	if (debug != NULL) {
	386	fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
	387	dumpst(v->tree, debug, 1);
	388	}
	389
	390	/*
	391	* Build compacted NFAs for tree and lacons.
	392	*/
	393
	394	re->re_info \|= nfatree(v, v->tree, debug);
	395	CNOERR();
	396	assert(v->nlacons == 0 \|\| v->lacons != NULL);
	397	for (i = 1; i < v->nlacons; i++) {
	398	if (debug != NULL) {
	399	fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
	400	}
	401	nfanode(v, &v->lacons[i], debug);
	402	}
	403	CNOERR();
	404	if (v->tree->flags&SHORTER) {
	405	NOTE(REG_USHORTEST);
	406	}
	407
	408	/*
	409	* Build compacted NFAs for tree, lacons, fast search.
	410	*/
	411
	412	if (debug != NULL) {
	413	fprintf(debug, "\n\n\n========= SEARCH ==========\n");
	414	}
	415
	416	/*
	417	* Can sacrifice main NFA now, so use it as work area.
	418	*/
	419
	420	(DISCARD) optimize(v->nfa, debug);
	421	CNOERR();
	422	makesearch(v, v->nfa);
	423	CNOERR();
	424	compact(v->nfa, &g->search);
	425	CNOERR();
	426
	427	/*
	428	* Looks okay, package it up.
	429	*/
	430
	431	re->re_nsub = v->nsubexp;
	432	v->re = NULL; /* freev no longer frees re */
	433	g->magic = GUTSMAGIC;
	434	g->cflags = v->cflags;
	435	g->info = re->re_info;
	436	g->nsub = re->re_nsub;
	437	g->tree = v->tree;
	438	v->tree = NULL;
	439	g->ntree = v->ntree;
	440	g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
	441	g->lacons = v->lacons;
	442	v->lacons = NULL;
	443	g->nlacons = v->nlacons;
	444
	445	if (flags&REG_DUMP) {
	446	dump(re, stdout);
	447	}
	448
	449	assert(v->err == 0);
	450	return freev(v, 0);
	451	}
	452
	453	/*
	454	- moresubs - enlarge subRE vector
	455	^ static void moresubs(struct vars *, int);
	456	*/
	457	static void
	458	moresubs(
	459	struct vars *v,
	460	int wanted) /* want enough room for this one */
	461	{
	462	struct subre **p;
	463	size_t n;
	464
	465	assert(wanted > 0 && (size_t)wanted >= v->nsubs);
	466	n = (size_t)wanted * 3 / 2 + 1;
	467	if (v->subs == v->sub10) {
	468	p = (struct subre *) MALLOC(n sizeof(struct subre *));
	469	if (p != NULL) {
	470	memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
	471	}
	472	} else {
	473	p = (struct subre *) REALLOC(v->subs, nsizeof(struct subre *));
	474	}
	475	if (p == NULL) {
	476	ERR(REG_ESPACE);
	477	return;
	478	}
	479
	480	v->subs = p;
	481	for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
	482	*p = NULL;
	483	}
	484	assert(v->nsubs == n);
	485	assert((size_t)wanted < v->nsubs);
	486	}
	487
	488	/*
	489	- freev - free vars struct's substructures where necessary
	490	* Optionally does error-number setting, and always returns error code (if
	491	* any), to make error-handling code terser.
	492	^ static int freev(struct vars *, int);
	493	*/
	494	static int
	495	freev(
	496	struct vars *v,
	497	int err)
	498	{
	499	register int ret;
	500
	501	if (v->re != NULL) {
	502	rfree(v->re);
	503	}
	504	if (v->subs != v->sub10) {
	505	FREE(v->subs);
	506	}
	507	if (v->nfa != NULL) {
	508	freenfa(v->nfa);
	509	}
	510	if (v->tree != NULL) {
	511	freesubre(v, v->tree);
	512	}
	513	if (v->treechain != NULL) {
	514	cleanst(v);
	515	}
	516	if (v->cv != NULL) {
	517	freecvec(v->cv);
	518	}
	519	if (v->cv2 != NULL) {
	520	freecvec(v->cv2);
	521	}
	522	if (v->lacons != NULL) {
	523	freelacons(v->lacons, v->nlacons);
	524	}
	525	ERR(err); /* nop if err==0 */
	526
	527	ret = v->err;
	528	FreeVars(v);
	529	return ret;
	530	}
	531
	532	/*
	533	- makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
	534	* NFA must have been optimize()d already.
	535	^ static void makesearch(struct vars , struct nfa );
	536	*/
	537	static void
	538	makesearch(
	539	struct vars *v,
	540	struct nfa *nfa)
	541	{
	542	struct arc a, b;
	543	struct state *pre = nfa->pre;
	544	struct state s, s2, *slist;
	545
	546	/*
	547	* No loops are needed if it's anchored.
	548	*/
	549
	550	for (a = pre->outs; a != NULL; a = a->outchain) {
	551	assert(a->type == PLAIN);
	552	if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) {
	553	break;
	554	}
	555	}
	556	if (a != NULL) {
	557	/*
	558	* Add implicit .* in front.
	559	*/
	560
	561	rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
	562
	563	/*
	564	* And ^* and \A* too -- not always necessary, but harmless.
	565	*/
	566
	567	newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
	568	newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
	569	}
	570
	571	/*
	572	* Now here's the subtle part. Because many REs have no lookback
	573	* constraints, often knowing when you were in the pre state tells you
	574	* little; it's the next state(s) that are informative. But some of them
	575	* may have other inarcs, i.e. it may be possible to make actual progress
	576	* and then return to one of them. We must de-optimize such cases,
	577	* splitting each such state into progress and no-progress states.
	578	*/
	579
	580	/*
	581	* First, make a list of the states.
	582	*/
	583
	584	slist = NULL;
	585	for (a=pre->outs ; a!=NULL ; a=a->outchain) {
	586	s = a->to;
	587	for (b=s->ins ; b!=NULL ; b=b->inchain) {
	588	if (b->from != pre) {
	589	break;
	590	}
	591	}
	592	if (b != NULL && s->tmp == NULL) {
	593	/*
	594	* Must be split if not already in the list (fixes bugs 505048,
	595	* 230589, 840258, 504785).
	596	*/
	597
	598	s->tmp = slist;
	599	slist = s;
	600	}
	601	}
	602
	603	/*
	604	* Do the splits.
	605	*/
	606
	607	for (s=slist ; s!=NULL ; s=s2) {
	608	s2 = newstate(nfa);
	609
	610	copyouts(nfa, s, s2);
	611	for (a=s->ins ; a!=NULL ; a=b) {
	612	b = a->inchain;
	613
	614	if (a->from != pre) {
	615	cparc(nfa, a, a->from, s2);
	616	freearc(nfa, a);
	617	}
	618	}
	619	s2 = s->tmp;
	620	s->tmp = NULL; /* clean up while we're at it */
	621	}
	622	}
	623
	624	/*
	625	- parse - parse an RE
	626	* This is actually just the top level, which parses a bunch of branches tied
	627	* together with '\|'. They appear in the tree as the left children of a chain
	628	* of '\|' subres.
	629	^ static struct subre parse(struct vars , int, int, struct state *,
	630	^ struct state *);
	631	*/
	632	static struct subre *
	633	parse(
	634	struct vars *v,
	635	int stopper, /* EOS or ')' */
	636	int type, /* LACON (lookahead subRE) or PLAIN */
	637	struct state init, / initial state */
	638	struct state final) / final state */
	639	{
	640	struct state left, right; /* scaffolding for branch */
	641	struct subre branches; / top level */
	642	struct subre branch; / current branch */
	643	struct subre t; / temporary */
	644	int firstbranch; /* is this the first branch? */
	645
	646	assert(stopper == ')' \|\| stopper == EOS);
	647
	648	branches = subre(v, '\|', LONGER, init, final);
	649	NOERRN();
	650	branch = branches;
	651	firstbranch = 1;
	652	do { /* a branch */
	653	if (!firstbranch) {
	654	/*
	655	* Need a place to hang the branch.
	656	*/
	657
	658	branch->right = subre(v, '\|', LONGER, init, final);
	659	NOERRN();
	660	branch = branch->right;
	661	}
	662	firstbranch = 0;
	663	left = newstate(v->nfa);
	664	right = newstate(v->nfa);
	665	NOERRN();
	666	EMPTYARC(init, left);
	667	EMPTYARC(right, final);
	668	NOERRN();
	669	branch->left = parsebranch(v, stopper, type, left, right, 0);
	670	NOERRN();
	671	branch->flags \|= UP(branch->flags \| branch->left->flags);
	672	if ((branch->flags &~ branches->flags) != 0) { /* new flags */
	673	for (t = branches; t != branch; t = t->right) {
	674	t->flags \|= branch->flags;
	675	}
	676	}
	677	} while (EAT('\|'));
	678	assert(SEE(stopper) \|\| SEE(EOS));
	679
	680	if (!SEE(stopper)) {
	681	assert(stopper == ')' && SEE(EOS));
	682	ERR(REG_EPAREN);
	683	}
	684
	685	/*
	686	* Optimize out simple cases.
	687	*/
	688
	689	if (branch == branches) { /* only one branch */
	690	assert(branch->right == NULL);
	691	t = branch->left;
	692	branch->left = NULL;
	693	freesubre(v, branches);
	694	branches = t;
	695	} else if (!MESSY(branches->flags)) { /* no interesting innards */
	696	freesubre(v, branches->left);
	697	branches->left = NULL;
	698	freesubre(v, branches->right);
	699	branches->right = NULL;
	700	branches->op = '=';
	701	}
	702
	703	return branches;
	704	}
	705
	706	/*
	707	- parsebranch - parse one branch of an RE
	708	* This mostly manages concatenation, working closely with parseqatom().
	709	* Concatenated things are bundled up as much as possible, with separate
	710	* ',' nodes introduced only when necessary due to substructure.
	711	^ static struct subre parsebranch(struct vars , int, int, struct state *,
	712	^ struct state *, int);
	713	*/
	714	static struct subre *
	715	parsebranch(
	716	struct vars *v,
	717	int stopper, /* EOS or ')' */
	718	int type, /* LACON (lookahead subRE) or PLAIN */
	719	struct state left, / leftmost state */
	720	struct state right, / rightmost state */
	721	int partial) /* is this only part of a branch? */
	722	{
	723	struct state lp; / left end of current construct */
	724	int seencontent; /* is there anything in this branch yet? */
	725	struct subre *t;
	726
	727	lp = left;
	728	seencontent = 0;
	729	t = subre(v, '=', 0, left, right); /* op '=' is tentative */
	730	NOERRN();
	731	while (!SEE('\|') && !SEE(stopper) && !SEE(EOS)) {
	732	if (seencontent) { /* implicit concat operator */
	733	lp = newstate(v->nfa);
	734	NOERRN();
	735	moveins(v->nfa, right, lp);
	736	}
	737	seencontent = 1;
	738
	739	/* NB, recursion in parseqatom() may swallow rest of branch */
	740	parseqatom(v, stopper, type, lp, right, t);
	741	}
	742
	743	if (!seencontent) { /* empty branch */
	744	if (!partial) {
	745	NOTE(REG_UUNSPEC);
	746	}
	747	assert(lp == left);
	748	EMPTYARC(left, right);
	749	}
	750
	751	return t;
	752	}
	753
	754	/*
	755	- parseqatom - parse one quantified atom or constraint of an RE
	756	* The bookkeeping near the end cooperates very closely with parsebranch(); in
	757	* particular, it contains a recursion that can involve parsing the rest of
	758	* the branch, making this function's name somewhat inaccurate.
	759	^ static void parseqatom(struct vars , int, int, struct state ,
	760	^ struct state , struct subre );
	761	*/
	762	static void
	763	parseqatom(
	764	struct vars *v,
	765	int stopper, /* EOS or ')' */
	766	int type, /* LACON (lookahead subRE) or PLAIN */
	767	struct state lp, / left state to hang it on */
	768	struct state rp, / right state to hang it on */
	769	struct subre top) / subtree top */
	770	{
	771	struct state s; / temporaries for new states */
	772	struct state *s2;
	773	#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
	774	int m, n;
	775	struct subre atom; / atom's subtree */
	776	struct subre *t;
	777	int cap; /* capturing parens? */
	778	int pos; /* positive lookahead? */
	779	int subno; /* capturing-parens or backref number */
	780	int atomtype;
	781	int qprefer; /* quantifier short/long preference */
	782	int f;
	783	struct subre *atomp; / where the pointer to atom is */
	784
	785	/*
	786	* Initial bookkeeping.
	787	*/
	788
	789	atom = NULL;
	790	assert(lp->nouts == 0); /* must string new code */
	791	assert(rp->nins == 0); /* between lp and rp */
	792	subno = 0; /* just to shut lint up */
	793
	794	/*
	795	* An atom or constraint...
	796	*/
	797
	798	atomtype = v->nexttype;
	799	switch (atomtype) {
	800	/* first, constraints, which end by returning */
	801	case '^':
	802	ARCV('^', 1);
	803	if (v->cflags&REG_NLANCH) {
	804	ARCV(BEHIND, v->nlcolor);
	805	}
	806	NEXT();
	807	return;
	808	case '$':
	809	ARCV('$', 1);
	810	if (v->cflags&REG_NLANCH) {
	811	ARCV(AHEAD, v->nlcolor);
	812	}
	813	NEXT();
	814	return;
	815	case SBEGIN:
	816	ARCV('^', 1); /* BOL */
	817	ARCV('^', 0); /* or BOS */
	818	NEXT();
	819	return;
	820	case SEND:
	821	ARCV('$', 1); /* EOL */
	822	ARCV('$', 0); /* or EOS */
	823	NEXT();
	824	return;
	825	case '<':
	826	wordchrs(v); /* does NEXT() */
	827	s = newstate(v->nfa);
	828	NOERR();
	829	nonword(v, BEHIND, lp, s);
	830	word(v, AHEAD, s, rp);
	831	return;
	832	case '>':
	833	wordchrs(v); /* does NEXT() */
	834	s = newstate(v->nfa);
	835	NOERR();
	836	word(v, BEHIND, lp, s);
	837	nonword(v, AHEAD, s, rp);
	838	return;
	839	case WBDRY:
	840	wordchrs(v); /* does NEXT() */
	841	s = newstate(v->nfa);
	842	NOERR();
	843	nonword(v, BEHIND, lp, s);
	844	word(v, AHEAD, s, rp);
	845	s = newstate(v->nfa);
	846	NOERR();
	847	word(v, BEHIND, lp, s);
	848	nonword(v, AHEAD, s, rp);
	849	return;
	850	case NWBDRY:
	851	wordchrs(v); /* does NEXT() */
	852	s = newstate(v->nfa);
	853	NOERR();
	854	word(v, BEHIND, lp, s);
	855	word(v, AHEAD, s, rp);
	856	s = newstate(v->nfa);
	857	NOERR();
	858	nonword(v, BEHIND, lp, s);
	859	nonword(v, AHEAD, s, rp);
	860	return;
	861	case LACON: /* lookahead constraint */
	862	pos = v->nextvalue;
	863	NEXT();
	864	s = newstate(v->nfa);
	865	s2 = newstate(v->nfa);
	866	NOERR();
	867	t = parse(v, ')', LACON, s, s2);
	868	freesubre(v, t); /* internal structure irrelevant */
	869	assert(SEE(')') \|\| ISERR());
	870	NEXT();
	871	n = newlacon(v, s, s2, pos);
	872	NOERR();
	873	ARCV(LACON, n);
	874	return;
	875
	876	/*
	877	* Then errors, to get them out of the way.
	878	*/
	879
	880	case '*':
	881	case '+':
	882	case '?':
	883	case '{':
	884	ERR(REG_BADRPT);
	885	return;
	886	default:
	887	ERR(REG_ASSERT);
	888	return;
	889
	890	/*
	891	* Then plain characters, and minor variants on that theme.
	892	*/
	893
	894	case ')': /* unbalanced paren */
	895	if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
	896	ERR(REG_EPAREN);
	897	return;
	898	}
	899
	900	/*
	901	* Legal in EREs due to specification botch.
	902	*/
	903
	904	NOTE(REG_UPBOTCH);
	905	/* fallthrough into case PLAIN */
	906	case PLAIN:
	907	onechr(v, v->nextvalue, lp, rp);
	908	okcolors(v->nfa, v->cm);
	909	NOERR();
	910	NEXT();
	911	break;
	912	case '[':
	913	if (v->nextvalue == 1) {
	914	bracket(v, lp, rp);
	915	} else {
	916	cbracket(v, lp, rp);
	917	}
	918	assert(SEE(']') \|\| ISERR());
	919	NEXT();
	920	break;
	921	case '.':
	922	rainbow(v->nfa, v->cm, PLAIN,
	923	(v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp);
	924	NEXT();
	925	break;
	926
	927	/*
	928	* And finally the ugly stuff.
	929	*/
	930
	931	case '(': /* value flags as capturing or non */
	932	cap = (type == LACON) ? 0 : v->nextvalue;
	933	if (cap) {
	934	v->nsubexp++;
	935	subno = v->nsubexp;
	936	if ((size_t)subno >= v->nsubs) {
	937	moresubs(v, subno);
	938	}
	939	assert((size_t)subno < v->nsubs);
	940	} else {
	941	atomtype = PLAIN; /* something that's not '(' */
	942	}
	943	NEXT();
	944
	945	/*
	946	* Need new endpoints because tree will contain pointers.
	947	*/
	948
	949	s = newstate(v->nfa);
	950	s2 = newstate(v->nfa);
	951	NOERR();
	952	EMPTYARC(lp, s);
	953	EMPTYARC(s2, rp);
	954	NOERR();
	955	atom = parse(v, ')', PLAIN, s, s2);
	956	assert(SEE(')') \|\| ISERR());
	957	NEXT();
	958	NOERR();
	959	if (cap) {
	960	v->subs[subno] = atom;
	961	t = subre(v, '(', atom->flags\|CAP, lp, rp);
	962	NOERR();
	963	t->subno = subno;
	964	t->left = atom;
	965	atom = t;
	966	}
	967
	968	/*
	969	* Postpone everything else pending possible {0}.
	970	*/
	971
	972	break;
	973	case BACKREF: /* the Feature From The Black Lagoon */
	974	INSIST(type != LACON, REG_ESUBREG);
	975	INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
	976	INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
	977	NOERR();
	978	assert(v->nextvalue > 0);
	979	atom = subre(v, 'b', BACKR, lp, rp);
	980	subno = v->nextvalue;
	981	atom->subno = subno;
	982	EMPTYARC(lp, rp); /* temporarily, so there's something */
	983	NEXT();
	984	break;
	985	}
	986
	987	/*
	988	* ...and an atom may be followed by a quantifier.
	989	*/
	990
	991	switch (v->nexttype) {
	992	case '*':
	993	m = 0;
	994	n = INFINITY;
	995	qprefer = (v->nextvalue) ? LONGER : SHORTER;
	996	NEXT();
	997	break;
	998	case '+':
	999	m = 1;
	1000	n = INFINITY;
	1001	qprefer = (v->nextvalue) ? LONGER : SHORTER;
	1002	NEXT();
	1003	break;
	1004	case '?':
	1005	m = 0;
	1006	n = 1;
	1007	qprefer = (v->nextvalue) ? LONGER : SHORTER;
	1008	NEXT();
	1009	break;
	1010	case '{':
	1011	NEXT();
	1012	m = scannum(v);
	1013	if (EAT(',')) {
	1014	if (SEE(DIGIT)) {
	1015	n = scannum(v);
	1016	} else {
	1017	n = INFINITY;
	1018	}
	1019	if (m > n) {
	1020	ERR(REG_BADBR);
	1021	return;
	1022	}
	1023
	1024	/*
	1025	* {m,n} exercises preference, even if it's {m,m}
	1026	*/
	1027
	1028	qprefer = (v->nextvalue) ? LONGER : SHORTER;
	1029	} else {
	1030	n = m;
	1031	/*
	1032	* {m} passes operand's preference through.
	1033	*/
	1034
	1035	qprefer = 0;
	1036	}
	1037	if (!SEE('}')) { /* catches errors too */
	1038	ERR(REG_BADBR);
	1039	return;
	1040	}
	1041	NEXT();
	1042	break;
	1043	default: /* no quantifier */
	1044	m = n = 1;
	1045	qprefer = 0;
	1046	break;
	1047	}
	1048
	1049	/*
	1050	* Annoying special case: {0} or {0,0} cancels everything.
	1051	*/
	1052
	1053	if (m == 0 && n == 0) {
	1054	if (atom != NULL) {
	1055	freesubre(v, atom);
	1056	}
	1057	if (atomtype == '(') {
	1058	v->subs[subno] = NULL;
	1059	}
	1060	delsub(v->nfa, lp, rp);
	1061	EMPTYARC(lp, rp);
	1062	return;
	1063	}
	1064
	1065	/*
	1066	* If not a messy case, avoid hard part.
	1067	*/
	1068
	1069	assert(!MESSY(top->flags));
	1070	f = top->flags \| qprefer \| ((atom != NULL) ? atom->flags : 0);
	1071	if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
	1072	if (!(m == 1 && n == 1)) {
	1073	repeat(v, lp, rp, m, n);
	1074	}
	1075	if (atom != NULL) {
	1076	freesubre(v, atom);
	1077	}
	1078	top->flags = f;
	1079	return;
	1080	}
	1081
	1082	/*
	1083	* hard part: something messy
	1084	* That is, capturing parens, back reference, short/long clash, or an atom
	1085	* with substructure containing one of those.
	1086	*/
	1087
	1088	/*
	1089	* Now we'll need a subre for the contents even if they're boring.
	1090	*/
	1091
	1092	if (atom == NULL) {
	1093	atom = subre(v, '=', 0, lp, rp);
	1094	NOERR();
	1095	}
	1096
	1097	/*
	1098	* Prepare a general-purpose state skeleton.
	1099	*
	1100	* ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
	1101	* / /
	1102	* [lp] ----> [s2] ----bypass---------------------
	1103	*
	1104	* where bypass is an empty, and prefix is some repetitions of atom
	1105	*/
	1106
	1107	s = newstate(v->nfa); /* first, new endpoints for the atom */
	1108	s2 = newstate(v->nfa);
	1109	NOERR();
	1110	moveouts(v->nfa, lp, s);
	1111	moveins(v->nfa, rp, s2);
	1112	NOERR();
	1113	atom->begin = s;
	1114	atom->end = s2;
	1115	s = newstate(v->nfa); /* and spots for prefix and bypass */
	1116	s2 = newstate(v->nfa);
	1117	NOERR();
	1118	EMPTYARC(lp, s);
	1119	EMPTYARC(lp, s2);
	1120	NOERR();
	1121
	1122	/*
	1123	* Break remaining subRE into x{...} and what follows.
	1124	*/
	1125
	1126	t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
	1127	t->left = atom;
	1128	atomp = &t->left;
	1129
	1130	/*
	1131	* Here we should recurse... but we must postpone that to the end.
	1132	*/
	1133
	1134	/*
	1135	* Split top into prefix and remaining.
	1136	*/
	1137
	1138	assert(top->op == '=' && top->left == NULL && top->right == NULL);
	1139	top->left = subre(v, '=', top->flags, top->begin, lp);
	1140	top->op = '.';
	1141	top->right = t;
	1142
	1143	/*
	1144	* If it's a backref, now is the time to replicate the subNFA.
	1145	*/
	1146
	1147	if (atomtype == BACKREF) {
	1148	assert(atom->begin->nouts == 1); /* just the EMPTY */
	1149	delsub(v->nfa, atom->begin, atom->end);
	1150	assert(v->subs[subno] != NULL);
	1151
	1152	/*
	1153	* And here's why the recursion got postponed: it must wait until the
	1154	* skeleton is filled in, because it may hit a backref that wants to
	1155	* copy the filled-in skeleton.
	1156	*/
	1157
	1158	dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
	1159	atom->begin, atom->end);
	1160	NOERR();
	1161	}
	1162
	1163	/*
	1164	* It's quantifier time; first, turn x{0,...} into x{1,...}\|empty
	1165	*/
	1166
	1167	if (m == 0) {
	1168	EMPTYARC(s2, atom->end);/* the bypass */
	1169	assert(PREF(qprefer) != 0);
	1170	f = COMBINE(qprefer, atom->flags);
	1171	t = subre(v, '\|', f, lp, atom->end);
	1172	NOERR();
	1173	t->left = atom;
	1174	t->right = subre(v, '\|', PREF(f), s2, atom->end);
	1175	NOERR();
	1176	t->right->left = subre(v, '=', 0, s2, atom->end);
	1177	NOERR();
	1178	*atomp = t;
	1179	atomp = &t->left;
	1180	m = 1;
	1181	}
	1182
	1183	/*
	1184	* Deal with the rest of the quantifier.
	1185	*/
	1186
	1187	if (atomtype == BACKREF) {
	1188	/*
	1189	* Special case: backrefs have internal quantifiers.
	1190	*/
	1191
	1192	EMPTYARC(s, atom->begin); /* empty prefix */
	1193
	1194	/*
	1195	* Just stuff everything into atom.
	1196	*/
	1197
	1198	repeat(v, atom->begin, atom->end, m, n);
	1199	atom->min = (short) m;
	1200	atom->max = (short) n;
	1201	atom->flags \|= COMBINE(qprefer, atom->flags);
	1202	} else if (m == 1 && n == 1) {
	1203	/*
	1204	* No/vacuous quantifier: done.
	1205	*/
	1206
	1207	EMPTYARC(s, atom->begin); /* empty prefix */
	1208	} else {
	1209	/*
	1210	* Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second
	1211	* x
	1212	*/
	1213
	1214	dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
	1215	assert(m >= 1 && m != INFINITY && n >= 1);
	1216	repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
	1217	f = COMBINE(qprefer, atom->flags);
	1218	t = subre(v, '.', f, s, atom->end); /* prefix and atom */
	1219	NOERR();
	1220	t->left = subre(v, '=', PREF(f), s, atom->begin);
	1221	NOERR();
	1222	t->right = atom;
	1223	*atomp = t;
	1224	}
	1225
	1226	/*
	1227	* And finally, look after that postponed recursion.
	1228	*/
	1229
	1230	t = top->right;
	1231	if (!(SEE('\|') \|\| SEE(stopper) \|\| SEE(EOS))) {
	1232	t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
	1233	} else {
	1234	EMPTYARC(atom->end, rp);
	1235	t->right = subre(v, '=', 0, atom->end, rp);
	1236	}
	1237	assert(SEE('\|') \|\| SEE(stopper) \|\| SEE(EOS));
	1238	t->flags \|= COMBINE(t->flags, t->right->flags);
	1239	top->flags \|= COMBINE(top->flags, t->flags);
	1240	}
	1241
	1242	/*
	1243	- nonword - generate arcs for non-word-character ahead or behind
	1244	^ static void nonword(struct vars , int, struct state , struct state *);
	1245	*/
	1246	static void
	1247	nonword(
	1248	struct vars *v,
	1249	int dir, /* AHEAD or BEHIND */
	1250	struct state *lp,
	1251	struct state *rp)
	1252	{
	1253	int anchor = (dir == AHEAD) ? '$' : '^';
	1254
	1255	assert(dir == AHEAD \|\| dir == BEHIND);
	1256	newarc(v->nfa, anchor, 1, lp, rp);
	1257	newarc(v->nfa, anchor, 0, lp, rp);
	1258	colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
	1259	/* (no need for special attention to \n) */
	1260	}
	1261
	1262	/*
	1263	- word - generate arcs for word character ahead or behind
	1264	^ static void word(struct vars , int, struct state , struct state *);
	1265	*/
	1266	static void
	1267	word(
	1268	struct vars *v,
	1269	int dir, /* AHEAD or BEHIND */
	1270	struct state *lp,
	1271	struct state *rp)
	1272	{
	1273	assert(dir == AHEAD \|\| dir == BEHIND);
	1274	cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
	1275	/* (no need for special attention to \n) */
	1276	}
	1277
	1278	/*
	1279	- scannum - scan a number
	1280	^ static int scannum(struct vars *);
	1281	*/
	1282	static int /* value, <= DUPMAX */
	1283	scannum(
	1284	struct vars *v)
	1285	{
	1286	int n = 0;
	1287
	1288	while (SEE(DIGIT) && n < DUPMAX) {
	1289	n = n*10 + v->nextvalue;
	1290	NEXT();
	1291	}
	1292	if (SEE(DIGIT) \|\| n > DUPMAX) {
	1293	ERR(REG_BADBR);
	1294	return 0;
	1295	}
	1296	return n;
	1297	}
	1298
	1299	/*
	1300	- repeat - replicate subNFA for quantifiers
	1301	* The duplication sequences used here are chosen carefully so that any
	1302	* pointers starting out pointing into the subexpression end up pointing into
	1303	* the last occurrence. (Note that it may not be strung between the same left
	1304	* and right end states, however!) This used to be important for the subRE
	1305	* tree, although the important bits are now handled by the in-line code in
	1306	* parse(), and when this is called, it doesn't matter any more.
	1307	^ static void repeat(struct vars , struct state , struct state *, int, int);
	1308	*/
	1309	static void
	1310	repeat(
	1311	struct vars *v,
	1312	struct state *lp,
	1313	struct state *rp,
	1314	int m,
	1315	int n)
	1316	{
	1317	#define SOME 2
	1318	#define INF 3
	1319	#define PAIR(x, y) ((x)*4 + (y))
	1320	#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
	1321	const int rm = REDUCE(m);
	1322	const int rn = REDUCE(n);
	1323	struct state s, s2;
	1324
	1325	switch (PAIR(rm, rn)) {
	1326	case PAIR(0, 0): /* empty string */
	1327	delsub(v->nfa, lp, rp);
	1328	EMPTYARC(lp, rp);
	1329	break;
	1330	case PAIR(0, 1): /* do as x\| */
	1331	EMPTYARC(lp, rp);
	1332	break;
	1333	case PAIR(0, SOME): /* do as x{1,n}\| */
	1334	repeat(v, lp, rp, 1, n);
	1335	NOERR();
	1336	EMPTYARC(lp, rp);
	1337	break;
	1338	case PAIR(0, INF): /* loop x around */
	1339	s = newstate(v->nfa);
	1340	NOERR();
	1341	moveouts(v->nfa, lp, s);
	1342	moveins(v->nfa, rp, s);
	1343	EMPTYARC(lp, s);
	1344	EMPTYARC(s, rp);
	1345	break;
	1346	case PAIR(1, 1): /* no action required */
	1347	break;
	1348	case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}\|)x */
	1349	s = newstate(v->nfa);
	1350	NOERR();
	1351	moveouts(v->nfa, lp, s);
	1352	dupnfa(v->nfa, s, rp, lp, s);
	1353	NOERR();
	1354	repeat(v, lp, s, 1, n-1);
	1355	NOERR();
	1356	EMPTYARC(lp, s);
	1357	break;
	1358	case PAIR(1, INF): /* add loopback arc */
	1359	s = newstate(v->nfa);
	1360	s2 = newstate(v->nfa);
	1361	NOERR();
	1362	moveouts(v->nfa, lp, s);
	1363	moveins(v->nfa, rp, s2);
	1364	EMPTYARC(lp, s);
	1365	EMPTYARC(s2, rp);
	1366	EMPTYARC(s2, s);
	1367	break;
	1368	case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */
	1369	s = newstate(v->nfa);
	1370	NOERR();
	1371	moveouts(v->nfa, lp, s);
	1372	dupnfa(v->nfa, s, rp, lp, s);
	1373	NOERR();
	1374	repeat(v, lp, s, m-1, n-1);
	1375	break;
	1376	case PAIR(SOME, INF): /* do as x{m-1,}x */
	1377	s = newstate(v->nfa);
	1378	NOERR();
	1379	moveouts(v->nfa, lp, s);
	1380	dupnfa(v->nfa, s, rp, lp, s);
	1381	NOERR();
	1382	repeat(v, lp, s, m-1, n);
	1383	break;
	1384	default:
	1385	ERR(REG_ASSERT);
	1386	break;
	1387	}
	1388	}
	1389
	1390	/*
	1391	- bracket - handle non-complemented bracket expression
	1392	* Also called from cbracket for complemented bracket expressions.
	1393	^ static void bracket(struct vars , struct state , struct state *);
	1394	*/
	1395	static void
	1396	bracket(
	1397	struct vars *v,
	1398	struct state *lp,
	1399	struct state *rp)
	1400	{
	1401	assert(SEE('['));
	1402	NEXT();
	1403	while (!SEE(']') && !SEE(EOS)) {
	1404	brackpart(v, lp, rp);
	1405	}
	1406	assert(SEE(']') \|\| ISERR());
	1407	okcolors(v->nfa, v->cm);
	1408	}
	1409
	1410	/*
	1411	- cbracket - handle complemented bracket expression
	1412	* We do it by calling bracket() with dummy endpoints, and then complementing
	1413	* the result. The alternative would be to invoke rainbow(), and then delete
	1414	* arcs as the b.e. is seen... but that gets messy.
	1415	^ static void cbracket(struct vars , struct state , struct state *);
	1416	*/
	1417	static void
	1418	cbracket(
	1419	struct vars *v,
	1420	struct state *lp,
	1421	struct state *rp)
	1422	{
	1423	struct state *left = newstate(v->nfa);
	1424	struct state *right = newstate(v->nfa);
	1425
	1426	NOERR();
	1427	bracket(v, left, right);
	1428	if (v->cflags&REG_NLSTOP) {
	1429	newarc(v->nfa, PLAIN, v->nlcolor, left, right);
	1430	}
	1431	NOERR();
	1432
	1433	assert(lp->nouts == 0); /* all outarcs will be ours */
	1434
	1435	/*
	1436	* Easy part of complementing, and all there is to do since the MCCE code
	1437	* was removed.
	1438	*/
	1439
	1440	colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
	1441	NOERR();
	1442	dropstate(v->nfa, left);
	1443	assert(right->nins == 0);
	1444	freestate(v->nfa, right);
	1445	return;
	1446	}
	1447
	1448	/*
	1449	- brackpart - handle one item (or range) within a bracket expression
	1450	^ static void brackpart(struct vars , struct state , struct state *);
	1451	*/
	1452	static void
	1453	brackpart(
	1454	struct vars *v,
	1455	struct state *lp,
	1456	struct state *rp)
	1457	{
	1458	celt startc, endc;
	1459	struct cvec *cv;
	1460	const chr startp, endp;
	1461	chr c[1];
	1462
	1463	/*
	1464	* Parse something, get rid of special cases, take shortcuts.
	1465	*/
	1466
	1467	switch (v->nexttype) {
	1468	case RANGE: /* a-b-c or other botch */
	1469	ERR(REG_ERANGE);
	1470	return;
	1471	break;
	1472	case PLAIN:
	1473	c[0] = v->nextvalue;
	1474	NEXT();
	1475
	1476	/*
	1477	* Shortcut for ordinary chr (not range).
	1478	*/
	1479
	1480	if (!SEE(RANGE)) {
	1481	onechr(v, c[0], lp, rp);
	1482	return;
	1483	}
	1484	startc = element(v, c, c+1);
	1485	NOERR();
	1486	break;
	1487	case COLLEL:
	1488	startp = v->now;
	1489	endp = scanplain(v);
	1490	INSIST(startp < endp, REG_ECOLLATE);
	1491	NOERR();
	1492	startc = element(v, startp, endp);
	1493	NOERR();
	1494	break;
	1495	case ECLASS:
	1496	startp = v->now;
	1497	endp = scanplain(v);
	1498	INSIST(startp < endp, REG_ECOLLATE);
	1499	NOERR();
	1500	startc = element(v, startp, endp);
	1501	NOERR();
	1502	cv = eclass(v, startc, (v->cflags&REG_ICASE));
	1503	NOERR();
	1504	dovec(v, cv, lp, rp);
	1505	return;
	1506	break;
	1507	case CCLASS:
	1508	startp = v->now;
	1509	endp = scanplain(v);
	1510	INSIST(startp < endp, REG_ECTYPE);
	1511	NOERR();
	1512	cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
	1513	NOERR();
	1514	dovec(v, cv, lp, rp);
	1515	return;
	1516	break;
	1517	default:
	1518	ERR(REG_ASSERT);
	1519	return;
	1520	break;
	1521	}
	1522
	1523	if (SEE(RANGE)) {
	1524	NEXT();
	1525	switch (v->nexttype) {
	1526	case PLAIN:
	1527	case RANGE:
	1528	c[0] = v->nextvalue;
	1529	NEXT();
	1530	endc = element(v, c, c+1);
	1531	NOERR();
	1532	break;
	1533	case COLLEL:
	1534	startp = v->now;
	1535	endp = scanplain(v);
	1536	INSIST(startp < endp, REG_ECOLLATE);
	1537	NOERR();
	1538	endc = element(v, startp, endp);
	1539	NOERR();
	1540	break;
	1541	default:
	1542	ERR(REG_ERANGE);
	1543	return;
	1544	break;
	1545	}
	1546	} else {
	1547	endc = startc;
	1548	}
	1549
	1550	/*
	1551	* Ranges are unportable. Actually, standard C does guarantee that digits
	1552	* are contiguous, but making that an exception is just too complicated.
	1553	*/
	1554
	1555	if (startc != endc) {
	1556	NOTE(REG_UUNPORT);
	1557	}
	1558	cv = range(v, startc, endc, (v->cflags&REG_ICASE));
	1559	NOERR();
	1560	dovec(v, cv, lp, rp);
	1561	}
	1562
	1563	/*
	1564	- scanplain - scan PLAIN contents of [. etc.
	1565	* Certain bits of trickery in lex.c know that this code does not try to look
	1566	* past the final bracket of the [. etc.
	1567	^ static const chr scanplain(struct vars );
	1568	*/
	1569	static const chr * /* just after end of sequence */
	1570	scanplain(
	1571	struct vars *v)
	1572	{
	1573	const chr *endp;
	1574
	1575	assert(SEE(COLLEL) \|\| SEE(ECLASS) \|\| SEE(CCLASS));
	1576	NEXT();
	1577
	1578	endp = v->now;
	1579	while (SEE(PLAIN)) {
	1580	endp = v->now;
	1581	NEXT();
	1582	}
	1583
	1584	assert(SEE(END) \|\| ISERR());
	1585	NEXT();
	1586
	1587	return endp;
	1588	}
	1589
	1590	/*
	1591	- onechr - fill in arcs for a plain character, and possible case complements
	1592	* This is mostly a shortcut for efficient handling of the common case.
	1593	^ static void onechr(struct vars , pchr, struct state , struct state *);
	1594	*/
	1595	static void
	1596	onechr(
	1597	struct vars *v,
	1598	pchr c,
	1599	struct state *lp,
	1600	struct state *rp)
	1601	{
	1602	if (!(v->cflags&REG_ICASE)) {
	1603	newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
	1604	return;
	1605	}
	1606
	1607	/*
	1608	* Rats, need general case anyway...
	1609	*/
	1610
	1611	dovec(v, allcases(v, c), lp, rp);
	1612	}
	1613
	1614	/*
	1615	- dovec - fill in arcs for each element of a cvec
	1616	^ static void dovec(struct vars , struct cvec , struct state *,
	1617	^ struct state *);
	1618	*/
	1619	static void
	1620	dovec(
	1621	struct vars *v,
	1622	struct cvec *cv,
	1623	struct state *lp,
	1624	struct state *rp)
	1625	{
	1626	chr ch, from, to;
	1627	const chr *p;
	1628	int i;
	1629
	1630	for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
	1631	ch = *p;
	1632	newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
	1633	}
	1634
	1635	for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
	1636	from = *p;
	1637	to = *(p+1);
	1638	if (from <= to) {
	1639	subrange(v, from, to, lp, rp);
	1640	}
	1641	}
	1642
	1643	}
	1644
	1645	/*
	1646	- wordchrs - set up word-chr list for word-boundary stuff, if needed
	1647	* The list is kept as a bunch of arcs between two dummy states; it's disposed
	1648	* of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must
	1649	* not be called from any unusual lexical context. This should be reconciled
	1650	* with the \w etc. handling in lex.c, and should be cleaned up to reduce
	1651	* dependencies on input scanning.
	1652	^ static void wordchrs(struct vars *);
	1653	*/
	1654	static void
	1655	wordchrs(
	1656	struct vars *v)
	1657	{
	1658	struct state left, right;
	1659
	1660	if (v->wordchrs != NULL) {
	1661	NEXT(); /* for consistency */
	1662	return;
	1663	}
	1664
	1665	left = newstate(v->nfa);
	1666	right = newstate(v->nfa);
	1667	NOERR();
	1668
	1669	/*
	1670	* Fine point: implemented with [::], and lexer will set REG_ULOCALE.
	1671	*/
	1672
	1673	lexword(v);
	1674	NEXT();
	1675	assert(v->savenow != NULL && SEE('['));
	1676	bracket(v, left, right);
	1677	assert((v->savenow != NULL && SEE(']')) \|\| ISERR());
	1678	NEXT();
	1679	NOERR();
	1680	v->wordchrs = left;
	1681	}
	1682
	1683	/*
	1684	- subre - allocate a subre
	1685	^ static struct subre subre(struct vars , int, int, struct state *,
	1686	^ struct state *);
	1687	*/
	1688	static struct subre *
	1689	subre(
	1690	struct vars *v,
	1691	int op,
	1692	int flags,
	1693	struct state *begin,
	1694	struct state *end)
	1695	{
	1696	struct subre *ret = v->treefree;
	1697
	1698	if (ret != NULL) {
	1699	v->treefree = ret->left;
	1700	} else {
	1701	ret = (struct subre *) MALLOC(sizeof(struct subre));
	1702	if (ret == NULL) {
	1703	ERR(REG_ESPACE);
	1704	return NULL;
	1705	}
	1706	ret->chain = v->treechain;
	1707	v->treechain = ret;
	1708	}
	1709
	1710	assert(strchr("\|.b(=", op) != NULL);
	1711
	1712	ret->op = op;
	1713	ret->flags = flags;
	1714	ret->retry = 0;
	1715	ret->subno = 0;
	1716	ret->min = ret->max = 1;
	1717	ret->left = NULL;
	1718	ret->right = NULL;
	1719	ret->begin = begin;
	1720	ret->end = end;
	1721	ZAPCNFA(ret->cnfa);
	1722
	1723	return ret;
	1724	}
	1725
	1726	/*
	1727	- freesubre - free a subRE subtree
	1728	^ static void freesubre(struct vars , struct subre );
	1729	*/
	1730	static void
	1731	freesubre(
	1732	struct vars v, / might be NULL */
	1733	struct subre *sr)
	1734	{
	1735	if (sr == NULL) {
	1736	return;
	1737	}
	1738
	1739	if (sr->left != NULL) {
	1740	freesubre(v, sr->left);
	1741	}
	1742	if (sr->right != NULL) {
	1743	freesubre(v, sr->right);
	1744	}
	1745
	1746	freesrnode(v, sr);
	1747	}
	1748
	1749	/*
	1750	- freesrnode - free one node in a subRE subtree
	1751	^ static void freesrnode(struct vars , struct subre );
	1752	*/
	1753	static void
	1754	freesrnode(
	1755	struct vars v, / might be NULL */
	1756	struct subre *sr)
	1757	{
	1758	if (sr == NULL) {
	1759	return;
	1760	}
	1761
	1762	if (!NULLCNFA(sr->cnfa)) {
	1763	freecnfa(&sr->cnfa);
	1764	}
	1765	sr->flags = 0;
	1766
	1767	if (v != NULL) {
	1768	sr->left = v->treefree;
	1769	v->treefree = sr;
	1770	} else {
	1771	FREE(sr);
	1772	}
	1773	}
	1774
	1775	/*
	1776	- optst - optimize a subRE subtree
	1777	^ static void optst(struct vars , struct subre );
	1778	*/
	1779	static void
	1780	optst(
	1781	struct vars *v,
	1782	struct subre *t)
	1783	{
	1784	/*
	1785	* DGP (2007-11-13): I assume it was the programmer's intent to eventually
	1786	* come back and add code to optimize subRE trees, but the routine coded
	1787	* just spends effort traversing the tree and doing nothing. We can do
	1788	* nothing with less effort.
	1789	*/
	1790
	1791	return;
	1792	}
	1793
	1794	/*
	1795	- numst - number tree nodes (assigning retry indexes)
	1796	^ static int numst(struct subre *, int);
	1797	*/
	1798	static int /* next number */
	1799	numst(
	1800	struct subre *t,
	1801	int start) /* starting point for subtree numbers */
	1802	{
	1803	int i;
	1804
	1805	assert(t != NULL);
	1806
	1807	i = start;
	1808	t->retry = (short) i++;
	1809	if (t->left != NULL) {
	1810	i = numst(t->left, i);
	1811	}
	1812	if (t->right != NULL) {
	1813	i = numst(t->right, i);
	1814	}
	1815	return i;
	1816	}
	1817
	1818	/*
	1819	- markst - mark tree nodes as INUSE
	1820	^ static void markst(struct subre *);
	1821	*/
	1822	static void
	1823	markst(
	1824	struct subre *t)
	1825	{
	1826	assert(t != NULL);
	1827
	1828	t->flags \|= INUSE;
	1829	if (t->left != NULL) {
	1830	markst(t->left);
	1831	}
	1832	if (t->right != NULL) {
	1833	markst(t->right);
	1834	}
	1835	}
	1836
	1837	/*
	1838	- cleanst - free any tree nodes not marked INUSE
	1839	^ static void cleanst(struct vars *);
	1840	*/
	1841	static void
	1842	cleanst(
	1843	struct vars *v)
	1844	{
	1845	struct subre *t;
	1846	struct subre *next;
	1847
	1848	for (t = v->treechain; t != NULL; t = next) {
	1849	next = t->chain;
	1850	if (!(t->flags&INUSE)) {
	1851	FREE(t);
	1852	}
	1853	}
	1854	v->treechain = NULL;
	1855	v->treefree = NULL; /* just on general principles */
	1856	}
	1857
	1858	/*
	1859	- nfatree - turn a subRE subtree into a tree of compacted NFAs
	1860	^ static long nfatree(struct vars , struct subre , FILE *);
	1861	*/
	1862	static long /* optimize results from top node */
	1863	nfatree(
	1864	struct vars *v,
	1865	struct subre *t,
	1866	FILE f) / for debug output */
	1867	{
	1868	assert(t != NULL && t->begin != NULL);
	1869
	1870	if (t->left != NULL) {
	1871	(DISCARD) nfatree(v, t->left, f);
	1872	}
	1873	if (t->right != NULL) {
	1874	(DISCARD) nfatree(v, t->right, f);
	1875	}
	1876
	1877	return nfanode(v, t, f);
	1878	}
	1879
	1880	/*
	1881	- nfanode - do one NFA for nfatree
	1882	^ static long nfanode(struct vars , struct subre , FILE *);
	1883	*/
	1884	static long /* optimize results */
	1885	nfanode(
	1886	struct vars *v,
	1887	struct subre *t,
	1888	FILE f) / for debug output */
	1889	{
	1890	struct nfa *nfa;
	1891	long ret = 0;
	1892	char idbuf[50];
	1893
	1894	assert(t->begin != NULL);
	1895
	1896	if (f != NULL) {
	1897	fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
	1898	stid(t, idbuf, sizeof(idbuf)));
	1899	}
	1900	nfa = newnfa(v, v->cm, v->nfa);
	1901	NOERRZ();
	1902	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
	1903	if (!ISERR()) {
	1904	specialcolors(nfa);
	1905	ret = optimize(nfa, f);
	1906	}
	1907	if (!ISERR()) {
	1908	compact(nfa, &t->cnfa);
	1909	}
	1910
	1911	freenfa(nfa);
	1912	return ret;
	1913	}
	1914
	1915	/*
	1916	- newlacon - allocate a lookahead-constraint subRE
	1917	^ static int newlacon(struct vars , struct state , struct state *, int);
	1918	*/
	1919	static int /* lacon number */
	1920	newlacon(
	1921	struct vars *v,
	1922	struct state *begin,
	1923	struct state *end,
	1924	int pos)
	1925	{
	1926	struct subre *sub;
	1927	int n;
	1928
	1929	if (v->nlacons == 0) {
	1930	v->lacons = (struct subre ) MALLOC(2 sizeof(struct subre));
	1931	n = 1; /* skip 0th */
	1932	v->nlacons = 2;
	1933	} else {
	1934	v->lacons = (struct subre *) REALLOC(v->lacons,
	1935	(v->nlacons+1)*sizeof(struct subre));
	1936	n = v->nlacons++;
	1937	}
	1938
	1939	if (v->lacons == NULL) {
	1940	ERR(REG_ESPACE);
	1941	return 0;
	1942	}
	1943
	1944	sub = &v->lacons[n];
	1945	sub->begin = begin;
	1946	sub->end = end;
	1947	sub->subno = pos;
	1948	ZAPCNFA(sub->cnfa);
	1949	return n;
	1950	}
	1951
	1952	/*
	1953	- freelacons - free lookahead-constraint subRE vector
	1954	^ static void freelacons(struct subre *, int);
	1955	*/
	1956	static void
	1957	freelacons(
	1958	struct subre *subs,
	1959	int n)
	1960	{
	1961	struct subre *sub;
	1962	int i;
	1963
	1964	assert(n > 0);
	1965	for (sub=subs+1, i=n-1; i>0; sub++, i--) { /* no 0th */
	1966	if (!NULLCNFA(sub->cnfa)) {
	1967	freecnfa(&sub->cnfa);
	1968	}
	1969	}
	1970	FREE(subs);
	1971	}
	1972
	1973	/*
	1974	- rfree - free a whole RE (insides of regfree)
	1975	^ static void rfree(regex_t *);
	1976	*/
	1977	static void
	1978	rfree(
	1979	regex_t *re)
	1980	{
	1981	struct guts *g;
	1982
	1983	if (re == NULL \|\| re->re_magic != REMAGIC) {
	1984	return;
	1985	}
	1986
	1987	re->re_magic = 0; /* invalidate RE */
	1988	g = (struct guts *) re->re_guts;
	1989	re->re_guts = NULL;
	1990	re->re_fns = NULL;
	1991	g->magic = 0;
	1992	freecm(&g->cmap);
	1993	if (g->tree != NULL) {
	1994	freesubre(NULL, g->tree);
	1995	}
	1996	if (g->lacons != NULL) {
	1997	freelacons(g->lacons, g->nlacons);
	1998	}
	1999	if (!NULLCNFA(g->search)) {
	2000	freecnfa(&g->search);
	2001	}
	2002	FREE(g);
	2003	}
	2004
	2005	/*
	2006	- dump - dump an RE in human-readable form
	2007	^ static void dump(regex_t , FILE );
	2008	*/
	2009	static void
	2010	dump(
	2011	regex_t *re,
	2012	FILE *f)
	2013	{
	2014	#ifdef REG_DEBUG
	2015	struct guts *g;
	2016	int i;
	2017
	2018	if (re->re_magic != REMAGIC) {
	2019	fprintf(f, "bad magic number (0x%x not 0x%x)\n",
	2020	re->re_magic, REMAGIC);
	2021	}
	2022	if (re->re_guts == NULL) {
	2023	fprintf(f, "NULL guts!!!\n");
	2024	return;
	2025	}
	2026	g = (struct guts *) re->re_guts;
	2027	if (g->magic != GUTSMAGIC) {
	2028	fprintf(f, "bad guts magic number (0x%x not 0x%x)\n",
	2029	g->magic, GUTSMAGIC);
	2030	}
	2031
	2032	fprintf(f, "\n\n\n========= DUMP ==========\n");
	2033	fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
	2034	re->re_nsub, re->re_info, re->re_csize, g->ntree);
	2035
	2036	dumpcolors(&g->cmap, f);
	2037	if (!NULLCNFA(g->search)) {
	2038	printf("\nsearch:\n");
	2039	dumpcnfa(&g->search, f);
	2040	}
	2041	for (i = 1; i < g->nlacons; i++) {
	2042	fprintf(f, "\nla%d (%s):\n", i,
	2043	(g->lacons[i].subno) ? "positive" : "negative");
	2044	dumpcnfa(&g->lacons[i].cnfa, f);
	2045	}
	2046	fprintf(f, "\n");
	2047	dumpst(g->tree, f, 0);
	2048	#endif
	2049	}
	2050
	2051	/*
	2052	- dumpst - dump a subRE tree
	2053	^ static void dumpst(struct subre , FILE , int);
	2054	*/
	2055	static void
	2056	dumpst(
	2057	struct subre *t,
	2058	FILE *f,
	2059	int nfapresent) /* is the original NFA still around? */
	2060	{
	2061	if (t == NULL) {
	2062	fprintf(f, "null tree\n");
	2063	} else {
	2064	stdump(t, f, nfapresent);
	2065	}
	2066	fflush(f);
	2067	}
	2068
	2069	/*
	2070	- stdump - recursive guts of dumpst
	2071	^ static void stdump(struct subre , FILE , int);
	2072	*/
	2073	static void
	2074	stdump(
	2075	struct subre *t,
	2076	FILE *f,
	2077	int nfapresent) /* is the original NFA still around? */
	2078	{
	2079	char idbuf[50];
	2080
	2081	fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
	2082	if (t->flags&LONGER) {
	2083	fprintf(f, " longest");
	2084	}
	2085	if (t->flags&SHORTER) {
	2086	fprintf(f, " shortest");
	2087	}
	2088	if (t->flags&MIXED) {
	2089	fprintf(f, " hasmixed");
	2090	}
	2091	if (t->flags&CAP) {
	2092	fprintf(f, " hascapture");
	2093	}
	2094	if (t->flags&BACKR) {
	2095	fprintf(f, " hasbackref");
	2096	}
	2097	if (!(t->flags&INUSE)) {
	2098	fprintf(f, " UNUSED");
	2099	}
	2100	if (t->subno != 0) {
	2101	fprintf(f, " (#%d)", t->subno);
	2102	}
	2103	if (t->min != 1 \|\| t->max != 1) {
	2104	fprintf(f, " {%d,", t->min);
	2105	if (t->max != INFINITY) {
	2106	fprintf(f, "%d", t->max);
	2107	}
	2108	fprintf(f, "}");
	2109	}
	2110	if (nfapresent) {
	2111	fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
	2112	}
	2113	if (t->left != NULL) {
	2114	fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
	2115	}
	2116	if (t->right != NULL) {
	2117	fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
	2118	}
	2119	if (!NULLCNFA(t->cnfa)) {
	2120	fprintf(f, "\n");
	2121	dumpcnfa(&t->cnfa, f);
	2122	}
	2123	fprintf(f, "\n");
	2124	if (t->left != NULL) {
	2125	stdump(t->left, f, nfapresent);
	2126	}
	2127	if (t->right != NULL) {
	2128	stdump(t->right, f, nfapresent);
	2129	}
	2130	}
	2131
	2132	/*
	2133	- stid - identify a subtree node for dumping
	2134	^ static char stid(struct subre , char *, size_t);
	2135	*/
	2136	static const char * /* points to buf or constant string */
	2137	stid(
	2138	struct subre *t,
	2139	char *buf,
	2140	size_t bufsize)
	2141	{
	2142	/*
	2143	* Big enough for hex int or decimal t->retry?
	2144	*/
	2145
	2146	if (bufsize < sizeof(void)2 + 3 \|\| bufsize < sizeof(t->retry)*3 + 1) {
	2147	return "unable";
	2148	}
	2149	if (t->retry != 0) {
	2150	sprintf(buf, "%d", t->retry);
	2151	} else {
	2152	sprintf(buf, "%p", t);
	2153	}
	2154	return buf;
	2155	}
	2156
	2157	#include "regc_lex.c"
	2158	#include "regc_color.c"
	2159	#include "regc_nfa.c"
	2160	#include "regc_cvec.c"
	2161	#include "regc_locale.c"
	2162
	2163	/*
	2164	* Local Variables:
	2165	* mode: c
	2166	* c-basic-offset: 4
	2167	* fill-column: 78
	2168	* End:
	2169	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: