File: | llvm/lib/Support/regengine.inc |
Warning: | line 775, column 39 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /*- | |||
2 | * This code is derived from OpenBSD's libc/regex, original license follows: | |||
3 | * | |||
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |||
5 | * Copyright (c) 1992, 1993, 1994 | |||
6 | * The Regents of the University of California. All rights reserved. | |||
7 | * | |||
8 | * This code is derived from software contributed to Berkeley by | |||
9 | * Henry Spencer. | |||
10 | * | |||
11 | * Redistribution and use in source and binary forms, with or without | |||
12 | * modification, are permitted provided that the following conditions | |||
13 | * are met: | |||
14 | * 1. Redistributions of source code must retain the above copyright | |||
15 | * notice, this list of conditions and the following disclaimer. | |||
16 | * 2. Redistributions in binary form must reproduce the above copyright | |||
17 | * notice, this list of conditions and the following disclaimer in the | |||
18 | * documentation and/or other materials provided with the distribution. | |||
19 | * 3. Neither the name of the University nor the names of its contributors | |||
20 | * may be used to endorse or promote products derived from this software | |||
21 | * without specific prior written permission. | |||
22 | * | |||
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |||
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |||
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
33 | * SUCH DAMAGE. | |||
34 | * | |||
35 | * @(#)regexec.c 8.3 (Berkeley) 3/20/94 | |||
36 | */ | |||
37 | ||||
38 | /* | |||
39 | * the outer shell of llvm_regexec() | |||
40 | * | |||
41 | * This file includes engine.inc *twice*, after muchos fiddling with the | |||
42 | * macros that code uses. This lets the same code operate on two different | |||
43 | * representations for state sets. | |||
44 | */ | |||
45 | #include <sys/types.h> | |||
46 | #include <stdio.h> | |||
47 | #include <stdlib.h> | |||
48 | #include <string.h> | |||
49 | #include <limits.h> | |||
50 | #include <ctype.h> | |||
51 | #include "regex_impl.h" | |||
52 | ||||
53 | #include "regutils.h" | |||
54 | #include "regex2.h" | |||
55 | ||||
56 | /* macros for manipulating states, small version */ | |||
57 | /* FIXME: 'states' is assumed as 'long' on small version. */ | |||
58 | #define states1long long /* for later use in llvm_regexec() decision */ | |||
59 | #define stateschar * states1long | |||
60 | #define CLEAR(v)memset(v, 0, m->g->nstates) ((v) = 0) | |||
61 | #define SET0(v, n)((v)[n] = 0) ((v) &= ~((unsigned long)1 << (n))) | |||
62 | #define SET1(v, n)((v)[n] = 1) ((v) |= (unsigned long)1 << (n)) | |||
63 | #define ISSET(v, n)((v)[n]) (((v) & ((unsigned long)1 << (n))) != 0) | |||
64 | #define ASSIGN(d, s)memmove(d, s, m->g->nstates) ((d) = (s)) | |||
65 | #define EQ(a, b)(memcmp(a, b, m->g->nstates) == 0) ((a) == (b)) | |||
66 | #define STATEVARSlong vn; char *space long dummy /* dummy version */ | |||
67 | #define STATESETUP(m, n){ (m)->space = malloc((n)*(m)->g->nstates); if ((m)-> space == ((void*)0)) return(12); (m)->vn = 0; } /* nothing */ | |||
68 | #define STATETEARDOWN(m){ free((m)->space); } /* nothing */ | |||
69 | #define SETUP(v)((v) = &m->space[m->vn++ * m->g->nstates]) ((v) = 0) | |||
70 | #define onestatelong long | |||
71 | #define INIT(o, n)((o) = (n)) ((o) = (unsigned long)1 << (n)) | |||
72 | #define INC(o)((o)++) ((o) = (unsigned long)(o) << 1) | |||
73 | #define ISSTATEIN(v, o)((v)[o]) (((v) & (o)) != 0) | |||
74 | /* some abbreviations; note that some of these know variable names! */ | |||
75 | /* do "if I'm here, I can also be there" etc without branches */ | |||
76 | #define FWD(dst, src, n)((dst)[here+(n)] |= (src)[here]) ((dst) |= ((unsigned long)(src)&(here)) << (n)) | |||
77 | #define BACK(dst, src, n)((dst)[here-(n)] |= (src)[here]) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) | |||
78 | #define ISSETBACK(v, n)((v)[here - (n)]) (((v) & ((unsigned long)here >> (n))) != 0) | |||
79 | /* function names */ | |||
80 | #define SNAMES /* engine.inc looks after details */ | |||
81 | ||||
82 | #include "regengine.inc" | |||
83 | ||||
84 | /* now undo things */ | |||
85 | #undef stateschar * | |||
86 | #undef CLEAR | |||
87 | #undef SET0 | |||
88 | #undef SET1 | |||
89 | #undef ISSET | |||
90 | #undef ASSIGN | |||
91 | #undef EQ | |||
92 | #undef STATEVARSlong vn; char *space | |||
93 | #undef STATESETUP | |||
94 | #undef STATETEARDOWN | |||
95 | #undef SETUP | |||
96 | #undef onestatelong | |||
97 | #undef INIT | |||
98 | #undef INC | |||
99 | #undef ISSTATEIN | |||
100 | #undef FWD | |||
101 | #undef BACK | |||
102 | #undef ISSETBACK | |||
103 | #undef SNAMES | |||
104 | ||||
105 | /* macros for manipulating states, large version */ | |||
106 | #define stateschar * char * | |||
107 | #define CLEAR(v)memset(v, 0, m->g->nstates) memset(v, 0, m->g->nstates) | |||
108 | #define SET0(v, n)((v)[n] = 0) ((v)[n] = 0) | |||
109 | #define SET1(v, n)((v)[n] = 1) ((v)[n] = 1) | |||
110 | #define ISSET(v, n)((v)[n]) ((v)[n]) | |||
111 | #define ASSIGN(d, s)memmove(d, s, m->g->nstates) memmove(d, s, m->g->nstates) | |||
112 | #define EQ(a, b)(memcmp(a, b, m->g->nstates) == 0) (memcmp(a, b, m->g->nstates) == 0) | |||
113 | #define STATEVARSlong vn; char *space long vn; char *space | |||
114 | #define STATESETUP(m, nv){ (m)->space = malloc((nv)*(m)->g->nstates); if ((m) ->space == ((void*)0)) return(12); (m)->vn = 0; } { (m)->space = malloc((nv)*(m)->g->nstates); \ | |||
115 | if ((m)->space == NULL((void*)0)) return(REG_ESPACE12); \ | |||
116 | (m)->vn = 0; } | |||
117 | #define STATETEARDOWN(m){ free((m)->space); } { free((m)->space); } | |||
118 | #define SETUP(v)((v) = &m->space[m->vn++ * m->g->nstates]) ((v) = &m->space[m->vn++ * m->g->nstates]) | |||
119 | #define onestatelong long | |||
120 | #define INIT(o, n)((o) = (n)) ((o) = (n)) | |||
121 | #define INC(o)((o)++) ((o)++) | |||
122 | #define ISSTATEIN(v, o)((v)[o]) ((v)[o]) | |||
123 | /* some abbreviations; note that some of these know variable names! */ | |||
124 | /* do "if I'm here, I can also be there" etc without branches */ | |||
125 | #define FWD(dst, src, n)((dst)[here+(n)] |= (src)[here]) ((dst)[here+(n)] |= (src)[here]) | |||
126 | #define BACK(dst, src, n)((dst)[here-(n)] |= (src)[here]) ((dst)[here-(n)] |= (src)[here]) | |||
127 | #define ISSETBACK(v, n)((v)[here - (n)]) ((v)[here - (n)]) | |||
128 | /* function names */ | |||
129 | #define LNAMES /* flag */ | |||
130 | ||||
131 | #include "regengine.inc" | |||
132 | ||||
133 | /* | |||
134 | - llvm_regexec - interface for matching | |||
135 | * | |||
136 | * We put this here so we can exploit knowledge of the state representation | |||
137 | * when choosing which matcher to call. Also, by this point the matchers | |||
138 | * have been prototyped. | |||
139 | */ | |||
140 | int /* 0 success, REG_NOMATCH failure */ | |||
141 | llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch, | |||
142 | llvm_regmatch_t pmatch[], int eflags) | |||
143 | { | |||
144 | struct re_guts *g = preg->re_g; | |||
145 | #ifdef REDEBUG | |||
146 | # define GOODFLAGS(f)((f)&(00001|00002|00004)) (f) | |||
147 | #else | |||
148 | # define GOODFLAGS(f)((f)&(00001|00002|00004)) ((f)&(REG_NOTBOL00001|REG_NOTEOL00002|REG_STARTEND00004)) | |||
149 | #endif | |||
150 | ||||
151 | if (preg->re_magic != MAGIC1((('r'^0200)<<8) | 'e') || g->magic != MAGIC2((('R'^0200)<<8)|'E')) | |||
| ||||
152 | return(REG_BADPAT2); | |||
153 | assert(!(g->iflags®EX_BAD))((void) (0)); | |||
154 | if (g->iflags®EX_BAD04) /* backstop for no-debug case */ | |||
155 | return(REG_BADPAT2); | |||
156 | eflags = GOODFLAGS(eflags)((eflags)&(00001|00002|00004)); | |||
157 | ||||
158 | if (g->nstates <= (long)(CHAR_BIT8*sizeof(states1long)) && !(eflags®_LARGE01000)) | |||
159 | return(smatcher(g, string, nmatch, pmatch, eflags)); | |||
160 | else | |||
161 | return(lmatcher(g, string, nmatch, pmatch, eflags)); | |||
162 | } |
1 | /*- | ||||
2 | * This code is derived from OpenBSD's libc/regex, original license follows: | ||||
3 | * | ||||
4 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | ||||
5 | * Copyright (c) 1992, 1993, 1994 | ||||
6 | * The Regents of the University of California. All rights reserved. | ||||
7 | * | ||||
8 | * This code is derived from software contributed to Berkeley by | ||||
9 | * Henry Spencer. | ||||
10 | * | ||||
11 | * Redistribution and use in source and binary forms, with or without | ||||
12 | * modification, are permitted provided that the following conditions | ||||
13 | * are met: | ||||
14 | * 1. Redistributions of source code must retain the above copyright | ||||
15 | * notice, this list of conditions and the following disclaimer. | ||||
16 | * 2. Redistributions in binary form must reproduce the above copyright | ||||
17 | * notice, this list of conditions and the following disclaimer in the | ||||
18 | * documentation and/or other materials provided with the distribution. | ||||
19 | * 3. Neither the name of the University nor the names of its contributors | ||||
20 | * may be used to endorse or promote products derived from this software | ||||
21 | * without specific prior written permission. | ||||
22 | * | ||||
23 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | ||||
24 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||||
25 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||||
26 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||||
27 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||||
28 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | ||||
29 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||||
30 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | ||||
31 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
32 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
33 | * SUCH DAMAGE. | ||||
34 | * | ||||
35 | * @(#)engine.c 8.5 (Berkeley) 3/20/94 | ||||
36 | */ | ||||
37 | |||||
38 | /* | ||||
39 | * The matching engine and friends. This file is #included by regexec.c | ||||
40 | * after suitable #defines of a variety of macros used herein, so that | ||||
41 | * different state representations can be used without duplicating masses | ||||
42 | * of code. | ||||
43 | */ | ||||
44 | |||||
45 | #ifdef SNAMES | ||||
46 | #define matcher smatcher | ||||
47 | #define fast sfast | ||||
48 | #define slow sslow | ||||
49 | #define dissect sdissect | ||||
50 | #define backref sbackref | ||||
51 | #define step sstep | ||||
52 | #define print sprint | ||||
53 | #define at sat | ||||
54 | #define match smat | ||||
55 | #define nope snope | ||||
56 | #endif | ||||
57 | #ifdef LNAMES | ||||
58 | #define matcher lmatcher | ||||
59 | #define fast lfast | ||||
60 | #define slow lslow | ||||
61 | #define dissect ldissect | ||||
62 | #define backref lbackref | ||||
63 | #define step lstep | ||||
64 | #define print lprint | ||||
65 | #define at lat | ||||
66 | #define match lmat | ||||
67 | #define nope lnope | ||||
68 | #endif | ||||
69 | |||||
70 | /* another structure passed up and down to avoid zillions of parameters */ | ||||
71 | struct match { | ||||
72 | struct re_guts *g; | ||||
73 | int eflags; | ||||
74 | llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ | ||||
75 | const char *offp; /* offsets work from here */ | ||||
76 | const char *beginp; /* start of string -- virtual NUL precedes */ | ||||
77 | const char *endp; /* end of string -- virtual NUL here */ | ||||
78 | const char *coldp; /* can be no match starting before here */ | ||||
79 | const char **lastpos; /* [nplus+1] */ | ||||
80 | STATEVARSlong vn; char *space; | ||||
81 | stateschar * st; /* current states */ | ||||
82 | stateschar * fresh; /* states for a fresh start */ | ||||
83 | stateschar * tmp; /* temporary */ | ||||
84 | stateschar * empty; /* empty set of states */ | ||||
85 | }; | ||||
86 | |||||
87 | static int matcher(struct re_guts *, const char *, size_t, | ||||
88 | llvm_regmatch_t[], int); | ||||
89 | static const char *dissect(struct match *, const char *, const char *, sopno, | ||||
90 | sopno); | ||||
91 | static const char *backref(struct match *, const char *, const char *, sopno, | ||||
92 | sopno, sopno, int); | ||||
93 | static const char *fast(struct match *, const char *, const char *, sopno, sopno); | ||||
94 | static const char *slow(struct match *, const char *, const char *, sopno, sopno); | ||||
95 | static stateschar * step(struct re_guts *, sopno, sopno, stateschar *, int, stateschar *); | ||||
96 | #define MAX_RECURSION100 100 | ||||
97 | #define BOL((127 +1)+1) (OUT(127 +1)+1) | ||||
98 | #define EOL(((127 +1)+1)+1) (BOL((127 +1)+1)+1) | ||||
99 | #define BOLEOL(((127 +1)+1)+2) (BOL((127 +1)+1)+2) | ||||
100 | #define NOTHING(((127 +1)+1)+3) (BOL((127 +1)+1)+3) | ||||
101 | #define BOW(((127 +1)+1)+4) (BOL((127 +1)+1)+4) | ||||
102 | #define EOW(((127 +1)+1)+5) (BOL((127 +1)+1)+5) | ||||
103 | #define CODEMAX(((127 +1)+1)+5) (BOL((127 +1)+1)+5) /* highest code used */ | ||||
104 | #define NONCHAR(c)((c) > 127) ((c) > CHAR_MAX127) | ||||
105 | #define NNONCHAR((((127 +1)+1)+5)-127) (CODEMAX(((127 +1)+1)+5)-CHAR_MAX127) | ||||
106 | #ifdef REDEBUG | ||||
107 | static void print(struct match *, char *, stateschar *, int, FILE *); | ||||
108 | #endif | ||||
109 | #ifdef REDEBUG | ||||
110 | static void at(struct match *, char *, char *, char *, sopno, sopno); | ||||
111 | #endif | ||||
112 | #ifdef REDEBUG | ||||
113 | static char *pchar(int); | ||||
114 | #endif | ||||
115 | |||||
116 | #ifdef REDEBUG | ||||
117 | #define SP(t, s, c) print(m, t, s, c, stdoutstdout) | ||||
118 | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) | ||||
119 | #define NOTE(str) { if (m->eflags®_TRACE00400) (void)printf("=%s\n", (str)); } | ||||
120 | static int nope = 0; | ||||
121 | #else | ||||
122 | #define SP(t, s, c) /* nothing */ | ||||
123 | #define AT(t, p1, p2, s1, s2) /* nothing */ | ||||
124 | #define NOTE(s) /* nothing */ | ||||
125 | #endif | ||||
126 | |||||
127 | /* | ||||
128 | - matcher - the actual matching engine | ||||
129 | */ | ||||
130 | static int /* 0 success, REG_NOMATCH failure */ | ||||
131 | matcher(struct re_guts *g, const char *string, size_t nmatch, | ||||
132 | llvm_regmatch_t pmatch[], | ||||
133 | int eflags) | ||||
134 | { | ||||
135 | const char *endp; | ||||
136 | size_t i; | ||||
137 | struct match mv; | ||||
138 | struct match *m = &mv; | ||||
139 | const char *dp; | ||||
140 | const sopno gf = g->firststate+1; /* +1 for OEND */ | ||||
141 | const sopno gl = g->laststate; | ||||
142 | const char *start; | ||||
143 | const char *stop; | ||||
144 | |||||
145 | /* simplify the situation where possible */ | ||||
146 | if (g->cflags®_NOSUB0004) | ||||
147 | nmatch = 0; | ||||
148 | if (eflags®_STARTEND00004) { | ||||
149 | start = string + pmatch[0].rm_so; | ||||
150 | stop = string + pmatch[0].rm_eo; | ||||
151 | } else { | ||||
152 | start = string; | ||||
153 | stop = start + strlen(start); | ||||
154 | } | ||||
155 | if (stop < start) | ||||
156 | return(REG_INVARG16); | ||||
157 | |||||
158 | /* prescreening; this does wonders for this rather slow code */ | ||||
159 | if (g->must != NULL((void*)0)) { | ||||
160 | for (dp = start; dp < stop; dp++) | ||||
161 | if (*dp == g->must[0] && stop - dp >= g->mlen && | ||||
162 | memcmp(dp, g->must, (size_t)g->mlen) == 0) | ||||
163 | break; | ||||
164 | if (dp == stop) /* we didn't find g->must */ | ||||
165 | return(REG_NOMATCH1); | ||||
166 | } | ||||
167 | |||||
168 | /* match struct setup */ | ||||
169 | m->g = g; | ||||
170 | m->eflags = eflags; | ||||
171 | m->pmatch = NULL((void*)0); | ||||
172 | m->lastpos = NULL((void*)0); | ||||
173 | m->offp = string; | ||||
174 | m->beginp = start; | ||||
175 | m->endp = stop; | ||||
176 | STATESETUP(m, 4){ (m)->space = malloc((4)*(m)->g->nstates); if ((m)-> space == ((void*)0)) return(12); (m)->vn = 0; }; | ||||
177 | SETUP(m->st)((m->st) = &m->space[m->vn++ * m->g->nstates ]); | ||||
178 | SETUP(m->fresh)((m->fresh) = &m->space[m->vn++ * m->g->nstates ]); | ||||
179 | SETUP(m->tmp)((m->tmp) = &m->space[m->vn++ * m->g->nstates ]); | ||||
180 | SETUP(m->empty)((m->empty) = &m->space[m->vn++ * m->g->nstates ]); | ||||
181 | CLEAR(m->empty)memset(m->empty, 0, m->g->nstates); | ||||
182 | |||||
183 | /* this loop does only one repetition except for backrefs */ | ||||
184 | for (;;) { | ||||
185 | endp = fast(m, start, stop, gf, gl); | ||||
186 | if (endp
| ||||
187 | free(m->pmatch); | ||||
188 | free((void*)m->lastpos); | ||||
189 | STATETEARDOWN(m){ free((m)->space); }; | ||||
190 | return(REG_NOMATCH1); | ||||
191 | } | ||||
192 | if (nmatch == 0 && !g->backrefs) | ||||
193 | break; /* no further info needed */ | ||||
194 | |||||
195 | /* where? */ | ||||
196 | assert(m->coldp != NULL)((void) (0)); | ||||
197 | for (;;) { | ||||
198 | NOTE("finding start"); | ||||
199 | endp = slow(m, m->coldp, stop, gf, gl); | ||||
200 | if (endp != NULL((void*)0)) | ||||
201 | break; | ||||
202 | assert(m->coldp < m->endp)((void) (0)); | ||||
203 | m->coldp++; | ||||
204 | } | ||||
205 | if (nmatch == 1 && !g->backrefs) | ||||
206 | break; /* no further info needed */ | ||||
207 | |||||
208 | /* oh my, they want the subexpressions... */ | ||||
209 | if (m->pmatch == NULL((void*)0)) | ||||
210 | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | ||||
211 | sizeof(llvm_regmatch_t)); | ||||
212 | if (m->pmatch == NULL((void*)0)) { | ||||
213 | STATETEARDOWN(m){ free((m)->space); }; | ||||
214 | return(REG_ESPACE12); | ||||
215 | } | ||||
216 | for (i = 1; i <= m->g->nsub; i++) | ||||
217 | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | ||||
218 | if (!g->backrefs && !(m->eflags®_BACKR02000)) { | ||||
219 | NOTE("dissecting"); | ||||
220 | dp = dissect(m, m->coldp, endp, gf, gl); | ||||
221 | } else { | ||||
222 | if (g->nplus > 0 && m->lastpos == NULL((void*)0)) | ||||
223 | m->lastpos = (const char **)malloc((g->nplus+1) * | ||||
224 | sizeof(char *)); | ||||
225 | if (g->nplus > 0 && m->lastpos == NULL((void*)0)) { | ||||
226 | free(m->pmatch); | ||||
227 | STATETEARDOWN(m){ free((m)->space); }; | ||||
228 | return(REG_ESPACE12); | ||||
229 | } | ||||
230 | NOTE("backref dissect"); | ||||
231 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | ||||
232 | } | ||||
233 | if (dp != NULL((void*)0)) | ||||
234 | break; | ||||
235 | |||||
236 | /* uh-oh... we couldn't find a subexpression-level match */ | ||||
237 | assert(g->backrefs)((void) (0)); /* must be back references doing it */ | ||||
238 | assert(g->nplus == 0 || m->lastpos != NULL)((void) (0)); | ||||
239 | for (;;) { | ||||
240 | if (dp != NULL((void*)0) || endp <= m->coldp) | ||||
241 | break; /* defeat */ | ||||
242 | NOTE("backoff"); | ||||
243 | endp = slow(m, m->coldp, endp-1, gf, gl); | ||||
244 | if (endp == NULL((void*)0)) | ||||
245 | break; /* defeat */ | ||||
246 | /* try it on a shorter possibility */ | ||||
247 | #ifndef NDEBUG | ||||
248 | for (i = 1; i <= m->g->nsub; i++) { | ||||
249 | assert(m->pmatch[i].rm_so == -1)((void) (0)); | ||||
250 | assert(m->pmatch[i].rm_eo == -1)((void) (0)); | ||||
251 | } | ||||
252 | #endif | ||||
253 | NOTE("backoff dissect"); | ||||
254 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | ||||
255 | } | ||||
256 | assert(dp == NULL || dp == endp)((void) (0)); | ||||
257 | if (dp != NULL((void*)0)) /* found a shorter one */ | ||||
258 | break; | ||||
259 | |||||
260 | /* despite initial appearances, there is no match here */ | ||||
261 | NOTE("false alarm"); | ||||
262 | if (m->coldp == stop) | ||||
263 | break; | ||||
264 | start = m->coldp + 1; /* recycle starting later */ | ||||
265 | } | ||||
266 | |||||
267 | /* fill in the details if requested */ | ||||
268 | if (nmatch > 0) { | ||||
269 | pmatch[0].rm_so = m->coldp - m->offp; | ||||
270 | pmatch[0].rm_eo = endp - m->offp; | ||||
271 | } | ||||
272 | if (nmatch > 1) { | ||||
273 | assert(m->pmatch != NULL)((void) (0)); | ||||
274 | for (i = 1; i < nmatch; i++) | ||||
275 | if (i <= m->g->nsub) | ||||
276 | pmatch[i] = m->pmatch[i]; | ||||
277 | else { | ||||
278 | pmatch[i].rm_so = -1; | ||||
279 | pmatch[i].rm_eo = -1; | ||||
280 | } | ||||
281 | } | ||||
282 | |||||
283 | if (m->pmatch != NULL((void*)0)) | ||||
284 | free((char *)m->pmatch); | ||||
285 | if (m->lastpos != NULL((void*)0)) | ||||
286 | free((char *)m->lastpos); | ||||
287 | STATETEARDOWN(m){ free((m)->space); }; | ||||
288 | return(0); | ||||
289 | } | ||||
290 | |||||
291 | /* | ||||
292 | - dissect - figure out what matched what, no back references | ||||
293 | */ | ||||
294 | static const char * /* == stop (success) always */ | ||||
295 | dissect(struct match *m, const char *start, const char *stop, sopno startst, | ||||
296 | sopno stopst) | ||||
297 | { | ||||
298 | int i; | ||||
299 | sopno ss; /* start sop of current subRE */ | ||||
300 | sopno es; /* end sop of current subRE */ | ||||
301 | const char *sp; /* start of string matched by it */ | ||||
302 | const char *stp; /* string matched by it cannot pass here */ | ||||
303 | const char *rest; /* start of rest of string */ | ||||
304 | const char *tail; /* string unmatched by rest of RE */ | ||||
305 | sopno ssub; /* start sop of subsubRE */ | ||||
306 | sopno esub; /* end sop of subsubRE */ | ||||
307 | const char *ssp; /* start of string matched by subsubRE */ | ||||
308 | const char *sep; /* end of string matched by subsubRE */ | ||||
309 | const char *oldssp; /* previous ssp */ | ||||
310 | |||||
311 | AT("diss", start, stop, startst, stopst); | ||||
312 | sp = start; | ||||
313 | for (ss = startst; ss < stopst; ss = es) { | ||||
314 | /* identify end of subRE */ | ||||
315 | es = ss; | ||||
316 | switch (OP(m->g->strip[es])((m->g->strip[es])&0xf8000000LU)) { | ||||
317 | case OPLUS_(9LU<<((unsigned)27)): | ||||
318 | case OQUEST_(11LU<<((unsigned)27)): | ||||
319 | es += OPND(m->g->strip[es])((m->g->strip[es])&0x07ffffffLU); | ||||
320 | break; | ||||
321 | case OCH_(15LU<<((unsigned)27)): | ||||
322 | while (OP(m->g->strip[es])((m->g->strip[es])&0xf8000000LU) != O_CH(18LU<<((unsigned)27))) | ||||
323 | es += OPND(m->g->strip[es])((m->g->strip[es])&0x07ffffffLU); | ||||
324 | break; | ||||
325 | } | ||||
326 | es++; | ||||
327 | |||||
328 | /* figure out what it matched */ | ||||
329 | switch (OP(m->g->strip[ss])((m->g->strip[ss])&0xf8000000LU)) { | ||||
330 | case OEND(1LU<<((unsigned)27)): | ||||
331 | assert(nope)((void) (0)); | ||||
332 | break; | ||||
333 | case OCHAR(2LU<<((unsigned)27)): | ||||
334 | sp++; | ||||
335 | break; | ||||
336 | case OBOL(3LU<<((unsigned)27)): | ||||
337 | case OEOL(4LU<<((unsigned)27)): | ||||
338 | case OBOW(19LU<<((unsigned)27)): | ||||
339 | case OEOW(20LU<<((unsigned)27)): | ||||
340 | break; | ||||
341 | case OANY(5LU<<((unsigned)27)): | ||||
342 | case OANYOF(6LU<<((unsigned)27)): | ||||
343 | sp++; | ||||
344 | break; | ||||
345 | case OBACK_(7LU<<((unsigned)27)): | ||||
346 | case O_BACK(8LU<<((unsigned)27)): | ||||
347 | assert(nope)((void) (0)); | ||||
348 | break; | ||||
349 | /* cases where length of match is hard to find */ | ||||
350 | case OQUEST_(11LU<<((unsigned)27)): | ||||
351 | stp = stop; | ||||
352 | for (;;) { | ||||
353 | /* how long could this one be? */ | ||||
354 | rest = slow(m, sp, stp, ss, es); | ||||
355 | assert(rest != NULL)((void) (0)); /* it did match */ | ||||
356 | /* could the rest match the rest? */ | ||||
357 | tail = slow(m, rest, stop, es, stopst); | ||||
358 | if (tail == stop) | ||||
359 | break; /* yes! */ | ||||
360 | /* no -- try a shorter match for this one */ | ||||
361 | stp = rest - 1; | ||||
362 | assert(stp >= sp)((void) (0)); /* it did work */ | ||||
363 | } | ||||
364 | ssub = ss + 1; | ||||
365 | esub = es - 1; | ||||
366 | /* did innards match? */ | ||||
367 | if (slow(m, sp, rest, ssub, esub) != NULL((void*)0)) { | ||||
368 | const char *dp = dissect(m, sp, rest, ssub, esub); | ||||
369 | (void)dp; /* avoid warning if assertions off */ | ||||
370 | assert(dp == rest)((void) (0)); | ||||
371 | } else /* no */ | ||||
372 | assert(sp == rest)((void) (0)); | ||||
373 | sp = rest; | ||||
374 | break; | ||||
375 | case OPLUS_(9LU<<((unsigned)27)): | ||||
376 | stp = stop; | ||||
377 | for (;;) { | ||||
378 | /* how long could this one be? */ | ||||
379 | rest = slow(m, sp, stp, ss, es); | ||||
380 | assert(rest != NULL)((void) (0)); /* it did match */ | ||||
381 | /* could the rest match the rest? */ | ||||
382 | tail = slow(m, rest, stop, es, stopst); | ||||
383 | if (tail == stop) | ||||
384 | break; /* yes! */ | ||||
385 | /* no -- try a shorter match for this one */ | ||||
386 | stp = rest - 1; | ||||
387 | assert(stp >= sp)((void) (0)); /* it did work */ | ||||
388 | } | ||||
389 | ssub = ss + 1; | ||||
390 | esub = es - 1; | ||||
391 | ssp = sp; | ||||
392 | oldssp = ssp; | ||||
393 | for (;;) { /* find last match of innards */ | ||||
394 | sep = slow(m, ssp, rest, ssub, esub); | ||||
395 | if (sep == NULL((void*)0) || sep == ssp) | ||||
396 | break; /* failed or matched null */ | ||||
397 | oldssp = ssp; /* on to next try */ | ||||
398 | ssp = sep; | ||||
399 | } | ||||
400 | if (sep == NULL((void*)0)) { | ||||
401 | /* last successful match */ | ||||
402 | sep = ssp; | ||||
403 | ssp = oldssp; | ||||
404 | } | ||||
405 | assert(sep == rest)((void) (0)); /* must exhaust substring */ | ||||
406 | assert(slow(m, ssp, sep, ssub, esub) == rest)((void) (0)); | ||||
407 | { | ||||
408 | const char *dp = dissect(m, ssp, sep, ssub, esub); | ||||
409 | (void)dp; /* avoid warning if assertions off */ | ||||
410 | assert(dp == sep)((void) (0)); | ||||
411 | } | ||||
412 | sp = rest; | ||||
413 | break; | ||||
414 | case OCH_(15LU<<((unsigned)27)): | ||||
415 | stp = stop; | ||||
416 | for (;;) { | ||||
417 | /* how long could this one be? */ | ||||
418 | rest = slow(m, sp, stp, ss, es); | ||||
419 | assert(rest != NULL)((void) (0)); /* it did match */ | ||||
420 | /* could the rest match the rest? */ | ||||
421 | tail = slow(m, rest, stop, es, stopst); | ||||
422 | if (tail == stop) | ||||
423 | break; /* yes! */ | ||||
424 | /* no -- try a shorter match for this one */ | ||||
425 | stp = rest - 1; | ||||
426 | assert(stp >= sp)((void) (0)); /* it did work */ | ||||
427 | } | ||||
428 | ssub = ss + 1; | ||||
429 | esub = ss + OPND(m->g->strip[ss])((m->g->strip[ss])&0x07ffffffLU) - 1; | ||||
430 | assert(OP(m->g->strip[esub]) == OOR1)((void) (0)); | ||||
431 | for (;;) { /* find first matching branch */ | ||||
432 | if (slow(m, sp, rest, ssub, esub) == rest) | ||||
433 | break; /* it matched all of it */ | ||||
434 | /* that one missed, try next one */ | ||||
435 | assert(OP(m->g->strip[esub]) == OOR1)((void) (0)); | ||||
436 | esub++; | ||||
437 | assert(OP(m->g->strip[esub]) == OOR2)((void) (0)); | ||||
438 | ssub = esub + 1; | ||||
439 | esub += OPND(m->g->strip[esub])((m->g->strip[esub])&0x07ffffffLU); | ||||
440 | if (OP(m->g->strip[esub])((m->g->strip[esub])&0xf8000000LU) == OOR2(17LU<<((unsigned)27))) | ||||
441 | esub--; | ||||
442 | else | ||||
443 | assert(OP(m->g->strip[esub]) == O_CH)((void) (0)); | ||||
444 | } | ||||
445 | { | ||||
446 | const char *dp = dissect(m, sp, rest, ssub, esub); | ||||
447 | (void)dp; /* avoid warning if assertions off */ | ||||
448 | assert(dp == rest)((void) (0)); | ||||
449 | } | ||||
450 | sp = rest; | ||||
451 | break; | ||||
452 | case O_PLUS(10LU<<((unsigned)27)): | ||||
453 | case O_QUEST(12LU<<((unsigned)27)): | ||||
454 | case OOR1(16LU<<((unsigned)27)): | ||||
455 | case OOR2(17LU<<((unsigned)27)): | ||||
456 | case O_CH(18LU<<((unsigned)27)): | ||||
457 | assert(nope)((void) (0)); | ||||
458 | break; | ||||
459 | case OLPAREN(13LU<<((unsigned)27)): | ||||
460 | i = OPND(m->g->strip[ss])((m->g->strip[ss])&0x07ffffffLU); | ||||
461 | assert(0 < i && i <= m->g->nsub)((void) (0)); | ||||
462 | m->pmatch[i].rm_so = sp - m->offp; | ||||
463 | break; | ||||
464 | case ORPAREN(14LU<<((unsigned)27)): | ||||
465 | i = OPND(m->g->strip[ss])((m->g->strip[ss])&0x07ffffffLU); | ||||
466 | assert(0 < i && i <= m->g->nsub)((void) (0)); | ||||
467 | m->pmatch[i].rm_eo = sp - m->offp; | ||||
468 | break; | ||||
469 | default: /* uh oh */ | ||||
470 | assert(nope)((void) (0)); | ||||
471 | break; | ||||
472 | } | ||||
473 | } | ||||
474 | |||||
475 | assert(sp == stop)((void) (0)); | ||||
476 | return(sp); | ||||
477 | } | ||||
478 | |||||
479 | /* | ||||
480 | - backref - figure out what matched what, figuring in back references | ||||
481 | */ | ||||
482 | static const char * /* == stop (success) or NULL (failure) */ | ||||
483 | backref(struct match *m, const char *start, const char *stop, sopno startst, | ||||
484 | sopno stopst, sopno lev, int rec) /* PLUS nesting level */ | ||||
485 | { | ||||
486 | int i; | ||||
487 | sopno ss; /* start sop of current subRE */ | ||||
488 | const char *sp; /* start of string matched by it */ | ||||
489 | sopno ssub; /* start sop of subsubRE */ | ||||
490 | sopno esub; /* end sop of subsubRE */ | ||||
491 | const char *ssp; /* start of string matched by subsubRE */ | ||||
492 | const char *dp; | ||||
493 | size_t len; | ||||
494 | int hard; | ||||
495 | sop s; | ||||
496 | llvm_regoff_t offsave; | ||||
497 | cset *cs; | ||||
498 | |||||
499 | AT("back", start, stop, startst, stopst); | ||||
500 | sp = start; | ||||
501 | |||||
502 | /* get as far as we can with easy stuff */ | ||||
503 | hard = 0; | ||||
504 | for (ss = startst; !hard && ss < stopst; ss++) | ||||
505 | switch (OP(s = m->g->strip[ss])((s = m->g->strip[ss])&0xf8000000LU)) { | ||||
506 | case OCHAR(2LU<<((unsigned)27)): | ||||
507 | if (sp == stop || *sp++ != (char)OPND(s)((s)&0x07ffffffLU)) | ||||
508 | return(NULL((void*)0)); | ||||
509 | break; | ||||
510 | case OANY(5LU<<((unsigned)27)): | ||||
511 | if (sp == stop) | ||||
512 | return(NULL((void*)0)); | ||||
513 | sp++; | ||||
514 | break; | ||||
515 | case OANYOF(6LU<<((unsigned)27)): | ||||
516 | cs = &m->g->sets[OPND(s)((s)&0x07ffffffLU)]; | ||||
517 | if (sp == stop || !CHIN(cs, *sp++)((cs)->ptr[(uch)(*sp++)] & (cs)->mask)) | ||||
518 | return(NULL((void*)0)); | ||||
519 | break; | ||||
520 | case OBOL(3LU<<((unsigned)27)): | ||||
521 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL00001)) || | ||||
522 | (sp < m->endp && *(sp-1) == '\n' && | ||||
523 | (m->g->cflags®_NEWLINE0010)) ) | ||||
524 | { /* yes */ } | ||||
525 | else | ||||
526 | return(NULL((void*)0)); | ||||
527 | break; | ||||
528 | case OEOL(4LU<<((unsigned)27)): | ||||
529 | if ( (sp == m->endp && !(m->eflags®_NOTEOL00002)) || | ||||
530 | (sp < m->endp && *sp == '\n' && | ||||
531 | (m->g->cflags®_NEWLINE0010)) ) | ||||
532 | { /* yes */ } | ||||
533 | else | ||||
534 | return(NULL((void*)0)); | ||||
535 | break; | ||||
536 | case OBOW(19LU<<((unsigned)27)): | ||||
537 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL00001)) || | ||||
538 | (sp < m->endp && *(sp-1) == '\n' && | ||||
539 | (m->g->cflags®_NEWLINE0010)) || | ||||
540 | (sp > m->beginp && | ||||
541 | !ISWORD(*(sp-1))(((*__ctype_b_loc ())[(int) ((*(sp-1)&0xff))] & (unsigned short int) _ISalnum) || (*(sp-1)) == '_')) ) && | ||||
542 | (sp < m->endp && ISWORD(*sp)(((*__ctype_b_loc ())[(int) ((*sp&0xff))] & (unsigned short int) _ISalnum) || (*sp) == '_')) ) | ||||
543 | { /* yes */ } | ||||
544 | else | ||||
545 | return(NULL((void*)0)); | ||||
546 | break; | ||||
547 | case OEOW(20LU<<((unsigned)27)): | ||||
548 | if (( (sp == m->endp && !(m->eflags®_NOTEOL00002)) || | ||||
549 | (sp < m->endp && *sp == '\n' && | ||||
550 | (m->g->cflags®_NEWLINE0010)) || | ||||
551 | (sp < m->endp && !ISWORD(*sp)(((*__ctype_b_loc ())[(int) ((*sp&0xff))] & (unsigned short int) _ISalnum) || (*sp) == '_')) ) && | ||||
552 | (sp > m->beginp && ISWORD(*(sp-1))(((*__ctype_b_loc ())[(int) ((*(sp-1)&0xff))] & (unsigned short int) _ISalnum) || (*(sp-1)) == '_')) ) | ||||
553 | { /* yes */ } | ||||
554 | else | ||||
555 | return(NULL((void*)0)); | ||||
556 | break; | ||||
557 | case O_QUEST(12LU<<((unsigned)27)): | ||||
558 | break; | ||||
559 | case OOR1(16LU<<((unsigned)27)): /* matches null but needs to skip */ | ||||
560 | ss++; | ||||
561 | s = m->g->strip[ss]; | ||||
562 | do { | ||||
563 | assert(OP(s) == OOR2)((void) (0)); | ||||
564 | ss += OPND(s)((s)&0x07ffffffLU); | ||||
565 | } while (OP(s = m->g->strip[ss])((s = m->g->strip[ss])&0xf8000000LU) != O_CH(18LU<<((unsigned)27))); | ||||
566 | /* note that the ss++ gets us past the O_CH */ | ||||
567 | break; | ||||
568 | default: /* have to make a choice */ | ||||
569 | hard = 1; | ||||
570 | break; | ||||
571 | } | ||||
572 | if (!hard) { /* that was it! */ | ||||
573 | if (sp != stop) | ||||
574 | return(NULL((void*)0)); | ||||
575 | return(sp); | ||||
576 | } | ||||
577 | ss--; /* adjust for the for's final increment */ | ||||
578 | |||||
579 | /* the hard stuff */ | ||||
580 | AT("hard", sp, stop, ss, stopst); | ||||
581 | s = m->g->strip[ss]; | ||||
582 | switch (OP(s)((s)&0xf8000000LU)) { | ||||
583 | case OBACK_(7LU<<((unsigned)27)): /* the vilest depths */ | ||||
584 | i = OPND(s)((s)&0x07ffffffLU); | ||||
585 | assert(0 < i && i <= m->g->nsub)((void) (0)); | ||||
586 | if (m->pmatch[i].rm_eo == -1) | ||||
587 | return(NULL((void*)0)); | ||||
588 | assert(m->pmatch[i].rm_so != -1)((void) (0)); | ||||
589 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | ||||
590 | if (len == 0 && rec++ > MAX_RECURSION100) | ||||
591 | return(NULL((void*)0)); | ||||
592 | assert(stop - m->beginp >= len)((void) (0)); | ||||
593 | if (sp > stop - len) | ||||
594 | return(NULL((void*)0)); /* not enough left to match */ | ||||
595 | ssp = m->offp + m->pmatch[i].rm_so; | ||||
596 | if (memcmp(sp, ssp, len) != 0) | ||||
597 | return(NULL((void*)0)); | ||||
598 | while (m->g->strip[ss] != SOP(O_BACK, i)(((8LU<<((unsigned)27)))|(i))) | ||||
599 | ss++; | ||||
600 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); | ||||
601 | break; | ||||
602 | case OQUEST_(11LU<<((unsigned)27)): /* to null or not */ | ||||
603 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | ||||
604 | if (dp != NULL((void*)0)) | ||||
605 | return(dp); /* not */ | ||||
606 | return(backref(m, sp, stop, ss+OPND(s)((s)&0x07ffffffLU)+1, stopst, lev, rec)); | ||||
607 | break; | ||||
608 | case OPLUS_(9LU<<((unsigned)27)): | ||||
609 | assert(m->lastpos != NULL)((void) (0)); | ||||
610 | assert(lev+1 <= m->g->nplus)((void) (0)); | ||||
611 | m->lastpos[lev+1] = sp; | ||||
612 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); | ||||
613 | break; | ||||
614 | case O_PLUS(10LU<<((unsigned)27)): | ||||
615 | if (sp == m->lastpos[lev]) /* last pass matched null */ | ||||
616 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); | ||||
617 | /* try another pass */ | ||||
618 | m->lastpos[lev] = sp; | ||||
619 | dp = backref(m, sp, stop, ss-OPND(s)((s)&0x07ffffffLU)+1, stopst, lev, rec); | ||||
620 | if (dp == NULL((void*)0)) | ||||
621 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); | ||||
622 | else | ||||
623 | return(dp); | ||||
624 | break; | ||||
625 | case OCH_(15LU<<((unsigned)27)): /* find the right one, if any */ | ||||
626 | ssub = ss + 1; | ||||
627 | esub = ss + OPND(s)((s)&0x07ffffffLU) - 1; | ||||
628 | assert(OP(m->g->strip[esub]) == OOR1)((void) (0)); | ||||
629 | for (;;) { /* find first matching branch */ | ||||
630 | dp = backref(m, sp, stop, ssub, esub, lev, rec); | ||||
631 | if (dp != NULL((void*)0)) | ||||
632 | return(dp); | ||||
633 | /* that one missed, try next one */ | ||||
634 | if (OP(m->g->strip[esub])((m->g->strip[esub])&0xf8000000LU) == O_CH(18LU<<((unsigned)27))) | ||||
635 | return(NULL((void*)0)); /* there is none */ | ||||
636 | esub++; | ||||
637 | assert(OP(m->g->strip[esub]) == OOR2)((void) (0)); | ||||
638 | ssub = esub + 1; | ||||
639 | esub += OPND(m->g->strip[esub])((m->g->strip[esub])&0x07ffffffLU); | ||||
640 | if (OP(m->g->strip[esub])((m->g->strip[esub])&0xf8000000LU) == OOR2(17LU<<((unsigned)27))) | ||||
641 | esub--; | ||||
642 | else | ||||
643 | assert(OP(m->g->strip[esub]) == O_CH)((void) (0)); | ||||
644 | } | ||||
645 | break; | ||||
646 | case OLPAREN(13LU<<((unsigned)27)): /* must undo assignment if rest fails */ | ||||
647 | i = OPND(s)((s)&0x07ffffffLU); | ||||
648 | assert(0 < i && i <= m->g->nsub)((void) (0)); | ||||
649 | offsave = m->pmatch[i].rm_so; | ||||
650 | m->pmatch[i].rm_so = sp - m->offp; | ||||
651 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | ||||
652 | if (dp != NULL((void*)0)) | ||||
653 | return(dp); | ||||
654 | m->pmatch[i].rm_so = offsave; | ||||
655 | return(NULL((void*)0)); | ||||
656 | break; | ||||
657 | case ORPAREN(14LU<<((unsigned)27)): /* must undo assignment if rest fails */ | ||||
658 | i = OPND(s)((s)&0x07ffffffLU); | ||||
659 | assert(0 < i && i <= m->g->nsub)((void) (0)); | ||||
660 | offsave = m->pmatch[i].rm_eo; | ||||
661 | m->pmatch[i].rm_eo = sp - m->offp; | ||||
662 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | ||||
663 | if (dp != NULL((void*)0)) | ||||
664 | return(dp); | ||||
665 | m->pmatch[i].rm_eo = offsave; | ||||
666 | return(NULL((void*)0)); | ||||
667 | break; | ||||
668 | default: /* uh oh */ | ||||
669 | assert(nope)((void) (0)); | ||||
670 | break; | ||||
671 | } | ||||
672 | |||||
673 | /* "can't happen" */ | ||||
674 | assert(nope)((void) (0)); | ||||
675 | /* NOTREACHED */ | ||||
676 | return NULL((void*)0); | ||||
677 | } | ||||
678 | |||||
679 | /* | ||||
680 | - fast - step through the string at top speed | ||||
681 | */ | ||||
682 | static const char * /* where tentative match ended, or NULL */ | ||||
683 | fast(struct match *m, const char *start, const char *stop, sopno startst, | ||||
684 | sopno stopst) | ||||
685 | { | ||||
686 | stateschar * st = m->st; | ||||
687 | stateschar * fresh = m->fresh; | ||||
688 | stateschar * tmp = m->tmp; | ||||
689 | const char *p = start; | ||||
690 | int c = (start
| ||||
691 | int lastc; /* previous c */ | ||||
692 | int flagch; | ||||
693 | int i; | ||||
694 | const char *coldp; /* last p after which no match was underway */ | ||||
695 | |||||
696 | CLEAR(st)memset(st, 0, m->g->nstates); | ||||
697 | SET1(st, startst)((st)[startst] = 1); | ||||
698 | st = step(m->g, startst, stopst, st, NOTHING(((127 +1)+1)+3), st); | ||||
699 | ASSIGN(fresh, st)memmove(fresh, st, m->g->nstates); | ||||
700 | SP("start", st, *p); | ||||
701 | coldp = NULL((void*)0); | ||||
702 | for (;;) { | ||||
703 | /* next character */ | ||||
704 | lastc = c; | ||||
705 | c = (p == m->endp) ? OUT(127 +1) : *p; | ||||
706 | if (EQ(st, fresh)(memcmp(st, fresh, m->g->nstates) == 0)) | ||||
707 | coldp = p; | ||||
708 | |||||
709 | /* is there an EOL and/or BOL between lastc and c? */ | ||||
710 | flagch = '\0'; | ||||
711 | i = 0; | ||||
712 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE0010) || | ||||
713 | (lastc == OUT(127 +1) && !(m->eflags®_NOTBOL00001)) ) { | ||||
714 | flagch = BOL((127 +1)+1); | ||||
715 | i = m->g->nbol; | ||||
716 | } | ||||
717 | if ( (c == '\n' && m->g->cflags®_NEWLINE0010) || | ||||
718 | (c == OUT(127 +1) && !(m->eflags®_NOTEOL00002)) ) { | ||||
719 | flagch = (flagch == BOL((127 +1)+1)) ? BOLEOL(((127 +1)+1)+2) : EOL(((127 +1)+1)+1); | ||||
720 | i += m->g->neol; | ||||
721 | } | ||||
722 | if (i
| ||||
723 | for (; i > 0; i--) | ||||
724 | st = step(m->g, startst, stopst, st, flagch, st); | ||||
725 | SP("boleol", st, c); | ||||
726 | } | ||||
727 | |||||
728 | /* how about a word boundary? */ | ||||
729 | if ( (flagch == BOL((127 +1)+1) || (lastc != OUT(127 +1) && !ISWORD(lastc)(((*__ctype_b_loc ())[(int) ((lastc&0xff))] & (unsigned short int) _ISalnum) || (lastc) == '_'))) && | ||||
730 | (c != OUT(127 +1) && ISWORD(c)(((*__ctype_b_loc ())[(int) ((c&0xff))] & (unsigned short int) _ISalnum) || (c) == '_')) ) { | ||||
731 | flagch = BOW(((127 +1)+1)+4); | ||||
732 | } | ||||
733 | if ( (lastc != OUT(127 +1) && ISWORD(lastc)(((*__ctype_b_loc ())[(int) ((lastc&0xff))] & (unsigned short int) _ISalnum) || (lastc) == '_')) && | ||||
734 | (flagch == EOL(((127 +1)+1)+1) || (c != OUT(127 +1) && !ISWORD(c)(((*__ctype_b_loc ())[(int) ((c&0xff))] & (unsigned short int) _ISalnum) || (c) == '_'))) ) { | ||||
735 | flagch = EOW(((127 +1)+1)+5); | ||||
736 | } | ||||
737 | if (flagch == BOW(((127 +1)+1)+4) || flagch == EOW(((127 +1)+1)+5)) { | ||||
738 | st = step(m->g, startst, stopst, st, flagch, st); | ||||
739 | SP("boweow", st, c); | ||||
740 | } | ||||
741 | |||||
742 | /* are we done? */ | ||||
743 | if (ISSET(st, stopst)((st)[stopst]) || p == stop) | ||||
744 | break; /* NOTE BREAK OUT */ | ||||
745 | |||||
746 | /* no, we must deal with this character */ | ||||
747 | ASSIGN(tmp, st)memmove(tmp, st, m->g->nstates); | ||||
748 | ASSIGN(st, fresh)memmove(st, fresh, m->g->nstates); | ||||
749 | assert(c != OUT)((void) (0)); | ||||
750 | st = step(m->g, startst, stopst, tmp, c, st); | ||||
751 | SP("aft", st, c); | ||||
752 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st))((void) (0)); | ||||
753 | p++; | ||||
754 | } | ||||
755 | |||||
756 | assert(coldp != NULL)((void) (0)); | ||||
757 | m->coldp = coldp; | ||||
758 | if (ISSET(st, stopst)((st)[stopst])) | ||||
759 | return(p+1); | ||||
760 | else | ||||
761 | return(NULL((void*)0)); | ||||
762 | } | ||||
763 | |||||
764 | /* | ||||
765 | - slow - step through the string more deliberately | ||||
766 | */ | ||||
767 | static const char * /* where it ended */ | ||||
768 | slow(struct match *m, const char *start, const char *stop, sopno startst, | ||||
769 | sopno stopst) | ||||
770 | { | ||||
771 | stateschar * st = m->st; | ||||
772 | stateschar * empty = m->empty; | ||||
773 | stateschar * tmp = m->tmp; | ||||
774 | const char *p = start; | ||||
775 | int c = (start
| ||||
| |||||
776 | int lastc; /* previous c */ | ||||
777 | int flagch; | ||||
778 | int i; | ||||
779 | const char *matchp; /* last p at which a match ended */ | ||||
780 | |||||
781 | AT("slow", start, stop, startst, stopst); | ||||
782 | CLEAR(st)memset(st, 0, m->g->nstates); | ||||
783 | SET1(st, startst)((st)[startst] = 1); | ||||
784 | SP("sstart", st, *p); | ||||
785 | st = step(m->g, startst, stopst, st, NOTHING(((127 +1)+1)+3), st); | ||||
786 | matchp = NULL((void*)0); | ||||
787 | for (;;) { | ||||
788 | /* next character */ | ||||
789 | lastc = c; | ||||
790 | c = (p == m->endp) ? OUT(127 +1) : *p; | ||||
791 | |||||
792 | /* is there an EOL and/or BOL between lastc and c? */ | ||||
793 | flagch = '\0'; | ||||
794 | i = 0; | ||||
795 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE0010) || | ||||
796 | (lastc == OUT(127 +1) && !(m->eflags®_NOTBOL00001)) ) { | ||||
797 | flagch = BOL((127 +1)+1); | ||||
798 | i = m->g->nbol; | ||||
799 | } | ||||
800 | if ( (c == '\n' && m->g->cflags®_NEWLINE0010) || | ||||
801 | (c == OUT(127 +1) && !(m->eflags®_NOTEOL00002)) ) { | ||||
802 | flagch = (flagch == BOL((127 +1)+1)) ? BOLEOL(((127 +1)+1)+2) : EOL(((127 +1)+1)+1); | ||||
803 | i += m->g->neol; | ||||
804 | } | ||||
805 | if (i != 0) { | ||||
806 | for (; i > 0; i--) | ||||
807 | st = step(m->g, startst, stopst, st, flagch, st); | ||||
808 | SP("sboleol", st, c); | ||||
809 | } | ||||
810 | |||||
811 | /* how about a word boundary? */ | ||||
812 | if ( (flagch == BOL((127 +1)+1) || (lastc != OUT(127 +1) && !ISWORD(lastc)(((*__ctype_b_loc ())[(int) ((lastc&0xff))] & (unsigned short int) _ISalnum) || (lastc) == '_'))) && | ||||
813 | (c != OUT(127 +1) && ISWORD(c)(((*__ctype_b_loc ())[(int) ((c&0xff))] & (unsigned short int) _ISalnum) || (c) == '_')) ) { | ||||
814 | flagch = BOW(((127 +1)+1)+4); | ||||
815 | } | ||||
816 | if ( (lastc != OUT(127 +1) && ISWORD(lastc)(((*__ctype_b_loc ())[(int) ((lastc&0xff))] & (unsigned short int) _ISalnum) || (lastc) == '_')) && | ||||
817 | (flagch == EOL(((127 +1)+1)+1) || (c != OUT(127 +1) && !ISWORD(c)(((*__ctype_b_loc ())[(int) ((c&0xff))] & (unsigned short int) _ISalnum) || (c) == '_'))) ) { | ||||
818 | flagch = EOW(((127 +1)+1)+5); | ||||
819 | } | ||||
820 | if (flagch == BOW(((127 +1)+1)+4) || flagch == EOW(((127 +1)+1)+5)) { | ||||
821 | st = step(m->g, startst, stopst, st, flagch, st); | ||||
822 | SP("sboweow", st, c); | ||||
823 | } | ||||
824 | |||||
825 | /* are we done? */ | ||||
826 | if (ISSET(st, stopst)((st)[stopst])) | ||||
827 | matchp = p; | ||||
828 | if (EQ(st, empty)(memcmp(st, empty, m->g->nstates) == 0) || p == stop) | ||||
829 | break; /* NOTE BREAK OUT */ | ||||
830 | |||||
831 | /* no, we must deal with this character */ | ||||
832 | ASSIGN(tmp, st)memmove(tmp, st, m->g->nstates); | ||||
833 | ASSIGN(st, empty)memmove(st, empty, m->g->nstates); | ||||
834 | assert(c != OUT)((void) (0)); | ||||
835 | st = step(m->g, startst, stopst, tmp, c, st); | ||||
836 | SP("saft", st, c); | ||||
837 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st))((void) (0)); | ||||
838 | p++; | ||||
839 | } | ||||
840 | |||||
841 | return(matchp); | ||||
842 | } | ||||
843 | |||||
844 | |||||
845 | /* | ||||
846 | - step - map set of states reachable before char to set reachable after | ||||
847 | */ | ||||
848 | static stateschar * | ||||
849 | step(struct re_guts *g, | ||||
850 | sopno start, /* start state within strip */ | ||||
851 | sopno stop, /* state after stop state within strip */ | ||||
852 | stateschar * bef, /* states reachable before */ | ||||
853 | int ch, /* character or NONCHAR code */ | ||||
854 | stateschar * aft) /* states already known reachable after */ | ||||
855 | { | ||||
856 | cset *cs; | ||||
857 | sop s; | ||||
858 | sopno pc; | ||||
859 | onestatelong here; /* note, macros know this name */ | ||||
860 | sopno look; | ||||
861 | int i; | ||||
862 | |||||
863 | for (pc = start, INIT(here, pc)((here) = (pc)); pc != stop; pc++, INC(here)((here)++)) { | ||||
864 | s = g->strip[pc]; | ||||
865 | switch (OP(s)((s)&0xf8000000LU)) { | ||||
866 | case OEND(1LU<<((unsigned)27)): | ||||
867 | assert(pc == stop-1)((void) (0)); | ||||
868 | break; | ||||
869 | case OCHAR(2LU<<((unsigned)27)): | ||||
870 | /* only characters can match */ | ||||
871 | assert(!NONCHAR(ch) || ch != (char)OPND(s))((void) (0)); | ||||
872 | if (ch == (char)OPND(s)((s)&0x07ffffffLU)) | ||||
873 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
874 | break; | ||||
875 | case OBOL(3LU<<((unsigned)27)): | ||||
876 | if (ch == BOL((127 +1)+1) || ch == BOLEOL(((127 +1)+1)+2)) | ||||
877 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
878 | break; | ||||
879 | case OEOL(4LU<<((unsigned)27)): | ||||
880 | if (ch == EOL(((127 +1)+1)+1) || ch == BOLEOL(((127 +1)+1)+2)) | ||||
881 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
882 | break; | ||||
883 | case OBOW(19LU<<((unsigned)27)): | ||||
884 | if (ch == BOW(((127 +1)+1)+4)) | ||||
885 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
886 | break; | ||||
887 | case OEOW(20LU<<((unsigned)27)): | ||||
888 | if (ch == EOW(((127 +1)+1)+5)) | ||||
889 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
890 | break; | ||||
891 | case OANY(5LU<<((unsigned)27)): | ||||
892 | if (!NONCHAR(ch)((ch) > 127)) | ||||
893 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
894 | break; | ||||
895 | case OANYOF(6LU<<((unsigned)27)): | ||||
896 | cs = &g->sets[OPND(s)((s)&0x07ffffffLU)]; | ||||
897 | if (!NONCHAR(ch)((ch) > 127) && CHIN(cs, ch)((cs)->ptr[(uch)(ch)] & (cs)->mask)) | ||||
898 | FWD(aft, bef, 1)((aft)[here+(1)] |= (bef)[here]); | ||||
899 | break; | ||||
900 | case OBACK_(7LU<<((unsigned)27)): /* ignored here */ | ||||
901 | case O_BACK(8LU<<((unsigned)27)): | ||||
902 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
903 | break; | ||||
904 | case OPLUS_(9LU<<((unsigned)27)): /* forward, this is just an empty */ | ||||
905 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
906 | break; | ||||
907 | case O_PLUS(10LU<<((unsigned)27)): /* both forward and back */ | ||||
908 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
909 | i = ISSETBACK(aft, OPND(s))((aft)[here - (((s)&0x07ffffffLU))]); | ||||
910 | BACK(aft, aft, OPND(s))((aft)[here-(((s)&0x07ffffffLU))] |= (aft)[here]); | ||||
911 | if (!i && ISSETBACK(aft, OPND(s))((aft)[here - (((s)&0x07ffffffLU))])) { | ||||
912 | /* oho, must reconsider loop body */ | ||||
913 | pc -= OPND(s)((s)&0x07ffffffLU) + 1; | ||||
914 | INIT(here, pc)((here) = (pc)); | ||||
915 | } | ||||
916 | break; | ||||
917 | case OQUEST_(11LU<<((unsigned)27)): /* two branches, both forward */ | ||||
918 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
919 | FWD(aft, aft, OPND(s))((aft)[here+(((s)&0x07ffffffLU))] |= (aft)[here]); | ||||
920 | break; | ||||
921 | case O_QUEST(12LU<<((unsigned)27)): /* just an empty */ | ||||
922 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
923 | break; | ||||
924 | case OLPAREN(13LU<<((unsigned)27)): /* not significant here */ | ||||
925 | case ORPAREN(14LU<<((unsigned)27)): | ||||
926 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
927 | break; | ||||
928 | case OCH_(15LU<<((unsigned)27)): /* mark the first two branches */ | ||||
929 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
930 | assert(OP(g->strip[pc+OPND(s)]) == OOR2)((void) (0)); | ||||
931 | FWD(aft, aft, OPND(s))((aft)[here+(((s)&0x07ffffffLU))] |= (aft)[here]); | ||||
932 | break; | ||||
933 | case OOR1(16LU<<((unsigned)27)): /* done a branch, find the O_CH */ | ||||
934 | if (ISSTATEIN(aft, here)((aft)[here])) { | ||||
935 | for (look = 1; | ||||
936 | OP(s = g->strip[pc+look])((s = g->strip[pc+look])&0xf8000000LU) != O_CH(18LU<<((unsigned)27)); | ||||
937 | look += OPND(s)((s)&0x07ffffffLU)) | ||||
938 | assert(OP(s) == OOR2)((void) (0)); | ||||
939 | FWD(aft, aft, look)((aft)[here+(look)] |= (aft)[here]); | ||||
940 | } | ||||
941 | break; | ||||
942 | case OOR2(17LU<<((unsigned)27)): /* propagate OCH_'s marking */ | ||||
943 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
944 | if (OP(g->strip[pc+OPND(s)])((g->strip[pc+((s)&0x07ffffffLU)])&0xf8000000LU) != O_CH(18LU<<((unsigned)27))) { | ||||
945 | assert(OP(g->strip[pc+OPND(s)]) == OOR2)((void) (0)); | ||||
946 | FWD(aft, aft, OPND(s))((aft)[here+(((s)&0x07ffffffLU))] |= (aft)[here]); | ||||
947 | } | ||||
948 | break; | ||||
949 | case O_CH(18LU<<((unsigned)27)): /* just empty */ | ||||
950 | FWD(aft, aft, 1)((aft)[here+(1)] |= (aft)[here]); | ||||
951 | break; | ||||
952 | default: /* ooooops... */ | ||||
953 | assert(nope)((void) (0)); | ||||
954 | break; | ||||
955 | } | ||||
956 | } | ||||
957 | |||||
958 | return(aft); | ||||
959 | } | ||||
960 | |||||
961 | #ifdef REDEBUG | ||||
962 | /* | ||||
963 | - print - print a set of states | ||||
964 | */ | ||||
965 | static void | ||||
966 | print(struct match *m, char *caption, stateschar * st, int ch, FILE *d) | ||||
967 | { | ||||
968 | struct re_guts *g = m->g; | ||||
969 | int i; | ||||
970 | int first = 1; | ||||
971 | |||||
972 | if (!(m->eflags®_TRACE00400)) | ||||
973 | return; | ||||
974 | |||||
975 | (void)fprintf(d, "%s", caption); | ||||
976 | if (ch != '\0') | ||||
977 | (void)fprintf(d, " %s", pchar(ch)); | ||||
978 | for (i = 0; i < g->nstates; i++) | ||||
979 | if (ISSET(st, i)((st)[i])) { | ||||
980 | (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); | ||||
981 | first = 0; | ||||
982 | } | ||||
983 | (void)fprintf(d, "\n"); | ||||
984 | } | ||||
985 | |||||
986 | /* | ||||
987 | - at - print current situation | ||||
988 | */ | ||||
989 | static void | ||||
990 | at(struct match *m, char *title, char *start, char *stop, sopno startst, | ||||
991 | sopno stopst) | ||||
992 | { | ||||
993 | if (!(m->eflags®_TRACE00400)) | ||||
994 | return; | ||||
995 | |||||
996 | (void)printf("%s %s-", title, pchar(*start)); | ||||
997 | (void)printf("%s ", pchar(*stop)); | ||||
998 | (void)printf("%ld-%ld\n", (long)startst, (long)stopst); | ||||
999 | } | ||||
1000 | |||||
1001 | #ifndef PCHARDONE | ||||
1002 | #define PCHARDONE /* never again */ | ||||
1003 | /* | ||||
1004 | - pchar - make a character printable | ||||
1005 | * | ||||
1006 | * Is this identical to regchar() over in debug.c? Well, yes. But a | ||||
1007 | * duplicate here avoids having a debugging-capable regexec.o tied to | ||||
1008 | * a matching debug.o, and this is convenient. It all disappears in | ||||
1009 | * the non-debug compilation anyway, so it doesn't matter much. | ||||
1010 | */ | ||||
1011 | static char * /* -> representation */ | ||||
1012 | pchar(int ch) | ||||
1013 | { | ||||
1014 | static char pbuf[10]; | ||||
1015 | |||||
1016 | if (isPrint(ch) || ch == ' ') | ||||
1017 | (void)snprintf(pbuf, sizeof pbuf, "%c", ch); | ||||
1018 | else | ||||
1019 | (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); | ||||
1020 | return(pbuf); | ||||
1021 | } | ||||
1022 | #endif | ||||
1023 | #endif | ||||
1024 | |||||
1025 | #undef matcher | ||||
1026 | #undef fast | ||||
1027 | #undef slow | ||||
1028 | #undef dissect | ||||
1029 | #undef backref | ||||
1030 | #undef step | ||||
1031 | #undef print | ||||
1032 | #undef at | ||||
1033 | #undef match | ||||
1034 | #undef nope |