Bug Summary

File:lib/Support/regcomp.c
Warning:line 1534, column 10
Dereference of null pointer

Annotated Source Code

1/*-
2 * This code is derived from OpenBSD's libc/regex, original license follows:
3 *
4 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Henry Spencer.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94
36 */
37
38#include <sys/types.h>
39#include <stdio.h>
40#include <string.h>
41#include <ctype.h>
42#include <limits.h>
43#include <stdlib.h>
44#include "regex_impl.h"
45
46#include "regutils.h"
47#include "regex2.h"
48
49#include "regcclass.h"
50#include "regcname.h"
51
52#include "llvm/Config/config.h"
53#if HAVE_STDINT_H1
54#include <stdint.h>
55#else
56/* Pessimistically bound memory use */
57#define SIZE_MAX(18446744073709551615UL) UINT_MAX(2147483647 *2U +1U)
58#endif
59
60/*
61 * parse structure, passed up and down to avoid global variables and
62 * other clumsinesses
63 */
64struct parse {
65 char *next; /* next character in RE */
66 char *end; /* end of string (-> NUL normally) */
67 int error; /* has an error been seen? */
68 sop *strip; /* malloced strip */
69 sopno ssize; /* malloced strip size (allocated) */
70 sopno slen; /* malloced strip length (used) */
71 int ncsalloc; /* number of csets allocated */
72 struct re_guts *g;
73# define NPAREN10 10 /* we need to remember () 1-9 for back refs */
74 sopno pbegin[NPAREN10]; /* -> ( ([0] unused) */
75 sopno pend[NPAREN10]; /* -> ) ([0] unused) */
76};
77
78static void p_ere(struct parse *, int);
79static void p_ere_exp(struct parse *);
80static void p_str(struct parse *);
81static void p_bre(struct parse *, int, int);
82static int p_simp_re(struct parse *, int);
83static int p_count(struct parse *);
84static void p_bracket(struct parse *);
85static void p_b_term(struct parse *, cset *);
86static void p_b_cclass(struct parse *, cset *);
87static void p_b_eclass(struct parse *, cset *);
88static char p_b_symbol(struct parse *);
89static char p_b_coll_elem(struct parse *, int);
90static char othercase(int);
91static void bothcases(struct parse *, int);
92static void ordinary(struct parse *, int);
93static void nonnewline(struct parse *);
94static void repeat(struct parse *, sopno, int, int);
95static int seterr(struct parse *, int);
96static cset *allocset(struct parse *);
97static void freeset(struct parse *, cset *);
98static int freezeset(struct parse *, cset *);
99static int firstch(struct parse *, cset *);
100static int nch(struct parse *, cset *);
101static void mcadd(struct parse *, cset *, const char *);
102static void mcinvert(struct parse *, cset *);
103static void mccase(struct parse *, cset *);
104static int isinsets(struct re_guts *, int);
105static int samesets(struct re_guts *, int, int);
106static void categorize(struct parse *, struct re_guts *);
107static sopno dupl(struct parse *, sopno, sopno);
108static void doemit(struct parse *, sop, size_t);
109static void doinsert(struct parse *, sop, size_t, sopno);
110static void dofwd(struct parse *, sopno, sop);
111static void enlarge(struct parse *, sopno);
112static void stripsnug(struct parse *, struct re_guts *);
113static void findmust(struct parse *, struct re_guts *);
114static sopno pluscount(struct parse *, struct re_guts *);
115
116static char nuls[10]; /* place to point scanner in event of error */
117
118/*
119 * macros for use with parse structure
120 * BEWARE: these know that the parse structure is named `p' !!!
121 */
122#define PEEK()(*p->next) (*p->next)
123#define PEEK2()(*(p->next+1)) (*(p->next+1))
124#define MORE()(p->next < p->end) (p->next < p->end)
125#define MORE2()(p->next+1 < p->end) (p->next+1 < p->end)
126#define SEE(c)((p->next < p->end) && (*p->next) == (c)) (MORE()(p->next < p->end) && PEEK()(*p->next) == (c))
127#define SEETWO(a, b)((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == (a) && (*(p->next
+1)) == (b))
(MORE()(p->next < p->end) && MORE2()(p->next+1 < p->end) && PEEK()(*p->next) == (a) && PEEK2()(*(p->next+1)) == (b))
128#define EAT(c)((((p->next < p->end) && (*p->next) == (c
))) ? ((p->next++), 1) : 0)
((SEE(c)((p->next < p->end) && (*p->next) == (c))) ? (NEXT()(p->next++), 1) : 0)
129#define EATTWO(a, b)((((p->next < p->end) && (p->next+1 < p
->end) && (*p->next) == (a) && (*(p->
next+1)) == (b))) ? ((p->next += 2), 1) : 0)
((SEETWO(a, b)((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == (a) && (*(p->next
+1)) == (b))
) ? (NEXT2()(p->next += 2), 1) : 0)
130#define NEXT()(p->next++) (p->next++)
131#define NEXT2()(p->next += 2) (p->next += 2)
132#define NEXTn(n)(p->next += (n)) (p->next += (n))
133#define GETNEXT()(*p->next++) (*p->next++)
134#define SETERROR(e)seterr(p, (e)) seterr(p, (e))
135#define REQUIRE(co, e)(void)((co) || seterr(p, (e))) (void)((co) || SETERROR(e)seterr(p, (e)))
136#define MUSTSEE(c, e)((void)(((p->next < p->end) && (*p->next)
== (c)) || seterr(p, (e))))
(REQUIRE(MORE() && PEEK() == (c), e)(void)(((p->next < p->end) && (*p->next) ==
(c)) || seterr(p, (e)))
)
137#define MUSTEAT(c, e)((void)(((p->next < p->end) && (*p->next++
) == (c)) || seterr(p, (e))))
(REQUIRE(MORE() && GETNEXT() == (c), e)(void)(((p->next < p->end) && (*p->next++
) == (c)) || seterr(p, (e)))
)
138#define MUSTNOTSEE(c, e)((void)((!(p->next < p->end) || (*p->next) != (c)
) || seterr(p, (e))))
(REQUIRE(!MORE() || PEEK() != (c), e)(void)((!(p->next < p->end) || (*p->next) != (c))
|| seterr(p, (e)))
)
139#define EMIT(op, sopnd)doemit(p, (sop)(op), (size_t)(sopnd)) doemit(p, (sop)(op), (size_t)(sopnd))
140#define INSERT(op, pos)doinsert(p, (sop)(op), (p->slen)-(pos)+1, pos) doinsert(p, (sop)(op), HERE()(p->slen)-(pos)+1, pos)
141#define AHEAD(pos)dofwd(p, pos, (p->slen)-(pos)) dofwd(p, pos, HERE()(p->slen)-(pos))
142#define ASTERN(sop, pos)doemit(p, (sop)(sop), (size_t)((p->slen)-pos)) EMIT(sop, HERE()-pos)doemit(p, (sop)(sop), (size_t)((p->slen)-pos))
143#define HERE()(p->slen) (p->slen)
144#define THERE()(p->slen - 1) (p->slen - 1)
145#define THERETHERE()(p->slen - 2) (p->slen - 2)
146#define DROP(n)(p->slen -= (n)) (p->slen -= (n))
147
148#ifdef _POSIX2_RE_DUP_MAX255
149#define DUPMAX255 _POSIX2_RE_DUP_MAX255
150#else
151#define DUPMAX255 255
152#endif
153#define INFINITY(255 + 1) (DUPMAX255 + 1)
154
155#ifndef NDEBUG
156static int never0 = 0; /* for use in asserts; shuts lint up */
157#else
158#define never0 0 /* some <assert.h>s have bugs too */
159#endif
160
161/*
162 - llvm_regcomp - interface for parser and compilation
163 */
164int /* 0 success, otherwise REG_something */
165llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
166{
167 struct parse pa;
168 struct re_guts *g;
169 struct parse *p = &pa;
170 int i;
171 size_t len;
172#ifdef REDEBUG
173# define GOODFLAGS(f)((f)&~0200) (f)
174#else
175# define GOODFLAGS(f)((f)&~0200) ((f)&~REG_DUMP0200)
176#endif
177
178 cflags = GOODFLAGS(cflags)((cflags)&~0200);
179 if ((cflags&REG_EXTENDED0001) && (cflags&REG_NOSPEC0020))
180 return(REG_INVARG16);
181
182 if (cflags&REG_PEND0040) {
183 if (preg->re_endp < pattern)
184 return(REG_INVARG16);
185 len = preg->re_endp - pattern;
186 } else
187 len = strlen((const char *)pattern);
188
189 /* do the mallocs early so failure handling is easy */
190 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
191 (NC(127 - (-127 -1) + 1)-1)*sizeof(cat_t));
192 if (g == NULL((void*)0))
193 return(REG_ESPACE12);
194 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
195 p->strip = (sop *)calloc(p->ssize, sizeof(sop));
196 p->slen = 0;
197 if (p->strip == NULL((void*)0)) {
198 free((char *)g);
199 return(REG_ESPACE12);
200 }
201
202 /* set things up */
203 p->g = g;
204 p->next = (char *)pattern; /* convenience; we do not modify it */
205 p->end = p->next + len;
206 p->error = 0;
207 p->ncsalloc = 0;
208 for (i = 0; i < NPAREN10; i++) {
209 p->pbegin[i] = 0;
210 p->pend[i] = 0;
211 }
212 g->csetsize = NC(127 - (-127 -1) + 1);
213 g->sets = NULL((void*)0);
214 g->setbits = NULL((void*)0);
215 g->ncsets = 0;
216 g->cflags = cflags;
217 g->iflags = 0;
218 g->nbol = 0;
219 g->neol = 0;
220 g->must = NULL((void*)0);
221 g->mlen = 0;
222 g->nsub = 0;
223 g->ncategories = 1; /* category 0 is "everything else" */
224 g->categories = &g->catspace[-(CHAR_MIN(-127 -1))];
225 (void) memset((char *)g->catspace, 0, NC(127 - (-127 -1) + 1)*sizeof(cat_t));
226 g->backrefs = 0;
227
228 /* do it */
229 EMIT(OEND, 0)doemit(p, (sop)((1LU<<((unsigned)27))), (size_t)(0));
230 g->firststate = THERE()(p->slen - 1);
231 if (cflags&REG_EXTENDED0001)
232 p_ere(p, OUT(127 +1));
233 else if (cflags&REG_NOSPEC0020)
234 p_str(p);
235 else
236 p_bre(p, OUT(127 +1), OUT(127 +1));
237 EMIT(OEND, 0)doemit(p, (sop)((1LU<<((unsigned)27))), (size_t)(0));
238 g->laststate = THERE()(p->slen - 1);
239
240 /* tidy up loose ends and fill things in */
241 categorize(p, g);
242 stripsnug(p, g);
243 findmust(p, g);
244 g->nplus = pluscount(p, g);
245 g->magic = MAGIC2((('R'^0200)<<8)|'E');
246 preg->re_nsub = g->nsub;
247 preg->re_g = g;
248 preg->re_magic = MAGIC1((('r'^0200)<<8) | 'e');
249#ifndef REDEBUG
250 /* not debugging, so can't rely on the assert() in llvm_regexec() */
251 if (g->iflags&REGEX_BAD04)
252 SETERROR(REG_ASSERT)seterr(p, (15));
253#endif
254
255 /* win or lose, we're done */
256 if (p->error != 0) /* lose */
257 llvm_regfree(preg);
258 return(p->error);
259}
260
261/*
262 - p_ere - ERE parser top level, concatenation and alternation
263 */
264static void
265p_ere(struct parse *p, int stop) /* character this ERE should end at */
266{
267 char c;
268 sopno prevback = 0;
269 sopno prevfwd = 0;
270 sopno conc;
271 int first = 1; /* is this the first alternative? */
272
273 for (;;) {
274 /* do a bunch of concatenated expressions */
275 conc = HERE()(p->slen);
276 while (MORE()(p->next < p->end) && (c = PEEK()(*p->next)) != '|' && c != stop)
277 p_ere_exp(p);
278 REQUIRE(HERE() != conc, REG_EMPTY)(void)(((p->slen) != conc) || seterr(p, (14))); /* require nonempty */
279
280 if (!EAT('|')((((p->next < p->end) && (*p->next) == ('|'
))) ? ((p->next++), 1) : 0)
)
281 break; /* NOTE BREAK OUT */
282
283 if (first) {
284 INSERT(OCH_, conc)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(conc)+1, conc)
; /* offset is wrong */
285 prevfwd = conc;
286 prevback = conc;
287 first = 0;
288 }
289 ASTERN(OOR1, prevback)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-prevback))
;
290 prevback = THERE()(p->slen - 1);
291 AHEAD(prevfwd)dofwd(p, prevfwd, (p->slen)-(prevfwd)); /* fix previous offset */
292 prevfwd = HERE()(p->slen);
293 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset is very wrong */
294 }
295
296 if (!first) { /* tail-end fixups */
297 AHEAD(prevfwd)dofwd(p, prevfwd, (p->slen)-(prevfwd));
298 ASTERN(O_CH, prevback)doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-prevback))
;
299 }
300
301 assert(!MORE() || SEE(stop))((void) (0));
302}
303
304/*
305 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
306 */
307static void
308p_ere_exp(struct parse *p)
309{
310 char c;
311 sopno pos;
312 int count;
313 int count2;
314 int backrefnum;
315 sopno subno;
316 int wascaret = 0;
317
318 assert(MORE())((void) (0)); /* caller should have ensured this */
319 c = GETNEXT()(*p->next++);
320
321 pos = HERE()(p->slen);
322 switch (c) {
323 case '(':
324 REQUIRE(MORE(), REG_EPAREN)(void)(((p->next < p->end)) || seterr(p, (8)));
325 p->g->nsub++;
326 subno = p->g->nsub;
327 if (subno < NPAREN10)
328 p->pbegin[subno] = HERE()(p->slen);
329 EMIT(OLPAREN, subno)doemit(p, (sop)((13LU<<((unsigned)27))), (size_t)(subno
))
;
330 if (!SEE(')')((p->next < p->end) && (*p->next) == (')'
))
)
331 p_ere(p, ')');
332 if (subno < NPAREN10) {
333 p->pend[subno] = HERE()(p->slen);
334 assert(p->pend[subno] != 0)((void) (0));
335 }
336 EMIT(ORPAREN, subno)doemit(p, (sop)((14LU<<((unsigned)27))), (size_t)(subno
))
;
337 MUSTEAT(')', REG_EPAREN)((void)(((p->next < p->end) && (*p->next++
) == (')')) || seterr(p, (8))))
;
338 break;
339#ifndef POSIX_MISTAKE
340 case ')': /* happens only if no current unmatched ( */
341 /*
342 * You may ask, why the ifndef? Because I didn't notice
343 * this until slightly too late for 1003.2, and none of the
344 * other 1003.2 regular-expression reviewers noticed it at
345 * all. So an unmatched ) is legal POSIX, at least until
346 * we can get it fixed.
347 */
348 SETERROR(REG_EPAREN)seterr(p, (8));
349 break;
350#endif
351 case '^':
352 EMIT(OBOL, 0)doemit(p, (sop)((3LU<<((unsigned)27))), (size_t)(0));
353 p->g->iflags |= USEBOL01;
354 p->g->nbol++;
355 wascaret = 1;
356 break;
357 case '$':
358 EMIT(OEOL, 0)doemit(p, (sop)((4LU<<((unsigned)27))), (size_t)(0));
359 p->g->iflags |= USEEOL02;
360 p->g->neol++;
361 break;
362 case '|':
363 SETERROR(REG_EMPTY)seterr(p, (14));
364 break;
365 case '*':
366 case '+':
367 case '?':
368 SETERROR(REG_BADRPT)seterr(p, (13));
369 break;
370 case '.':
371 if (p->g->cflags&REG_NEWLINE0010)
372 nonnewline(p);
373 else
374 EMIT(OANY, 0)doemit(p, (sop)((5LU<<((unsigned)27))), (size_t)(0));
375 break;
376 case '[':
377 p_bracket(p);
378 break;
379 case '\\':
380 REQUIRE(MORE(), REG_EESCAPE)(void)(((p->next < p->end)) || seterr(p, (5)));
381 c = GETNEXT()(*p->next++);
382 if (c >= '1' && c <= '9') {
383 /* \[0-9] is taken to be a back-reference to a previously specified
384 * matching group. backrefnum will hold the number. The matching
385 * group must exist (i.e. if \4 is found there must have been at
386 * least 4 matching groups specified in the pattern previously).
387 */
388 backrefnum = c - '0';
389 if (p->pend[backrefnum] == 0) {
390 SETERROR(REG_ESUBREG)seterr(p, (6));
391 break;
392 }
393
394 /* Make sure everything checks out and emit the sequence
395 * that marks a back-reference to the parse structure.
396 */
397 assert(backrefnum <= p->g->nsub)((void) (0));
398 EMIT(OBACK_, backrefnum)doemit(p, (sop)((7LU<<((unsigned)27))), (size_t)(backrefnum
))
;
399 assert(p->pbegin[backrefnum] != 0)((void) (0));
400 assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN)((void) (0));
401 assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN)((void) (0));
402 (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
403 EMIT(O_BACK, backrefnum)doemit(p, (sop)((8LU<<((unsigned)27))), (size_t)(backrefnum
))
;
404 p->g->backrefs = 1;
405 } else {
406 /* Other chars are simply themselves when escaped with a backslash.
407 */
408 ordinary(p, c);
409 }
410 break;
411 case '{': /* okay as ordinary except if digit follows */
412 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT)(void)((!(p->next < p->end) || !((*__ctype_b_loc ())
[(int) (((uch)(*p->next)))] & (unsigned short int) _ISdigit
)) || seterr(p, (13)))
;
413 /* FALLTHROUGH */
414 default:
415 ordinary(p, c);
416 break;
417 }
418
419 if (!MORE()(p->next < p->end))
420 return;
421 c = PEEK()(*p->next);
422 /* we call { a repetition if followed by a digit */
423 if (!( c == '*' || c == '+' || c == '?' ||
424 (c == '{' && MORE2()(p->next+1 < p->end) && isdigit((uch)PEEK2())((*__ctype_b_loc ())[(int) (((uch)(*(p->next+1))))] & (
unsigned short int) _ISdigit)
) ))
425 return; /* no repetition, we're done */
426 NEXT()(p->next++);
427
428 REQUIRE(!wascaret, REG_BADRPT)(void)((!wascaret) || seterr(p, (13)));
429 switch (c) {
430 case '*': /* implemented as +? */
431 /* this case does not require the (y|) trick, noKLUDGE */
432 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
433 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
434 INSERT(OQUEST_, pos)doinsert(p, (sop)((11LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
;
435 ASTERN(O_QUEST, pos)doemit(p, (sop)((12LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
436 break;
437 case '+':
438 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
439 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
440 break;
441 case '?':
442 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
443 INSERT(OCH_, pos)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
; /* offset slightly wrong */
444 ASTERN(OOR1, pos)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
; /* this one's right */
445 AHEAD(pos)dofwd(p, pos, (p->slen)-(pos)); /* fix the OCH_ */
446 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset very wrong... */
447 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1))); /* ...so fix it */
448 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
449 break;
450 case '{':
451 count = p_count(p);
452 if (EAT(',')((((p->next < p->end) && (*p->next) == (','
))) ? ((p->next++), 1) : 0)
) {
453 if (isdigit((uch)PEEK())((*__ctype_b_loc ())[(int) (((uch)(*p->next)))] & (unsigned
short int) _ISdigit)
) {
454 count2 = p_count(p);
455 REQUIRE(count <= count2, REG_BADBR)(void)((count <= count2) || seterr(p, (10)));
456 } else /* single number with comma */
457 count2 = INFINITY(255 + 1);
458 } else /* just a single number */
459 count2 = count;
460 repeat(p, pos, count, count2);
461 if (!EAT('}')((((p->next < p->end) && (*p->next) == ('}'
))) ? ((p->next++), 1) : 0)
) { /* error heuristics */
462 while (MORE()(p->next < p->end) && PEEK()(*p->next) != '}')
463 NEXT()(p->next++);
464 REQUIRE(MORE(), REG_EBRACE)(void)(((p->next < p->end)) || seterr(p, (9)));
465 SETERROR(REG_BADBR)seterr(p, (10));
466 }
467 break;
468 }
469
470 if (!MORE()(p->next < p->end))
471 return;
472 c = PEEK()(*p->next);
473 if (!( c == '*' || c == '+' || c == '?' ||
474 (c == '{' && MORE2()(p->next+1 < p->end) && isdigit((uch)PEEK2())((*__ctype_b_loc ())[(int) (((uch)(*(p->next+1))))] & (
unsigned short int) _ISdigit)
) ) )
475 return;
476 SETERROR(REG_BADRPT)seterr(p, (13));
477}
478
479/*
480 - p_str - string (no metacharacters) "parser"
481 */
482static void
483p_str(struct parse *p)
484{
485 REQUIRE(MORE(), REG_EMPTY)(void)(((p->next < p->end)) || seterr(p, (14)));
486 while (MORE()(p->next < p->end))
487 ordinary(p, GETNEXT()(*p->next++));
488}
489
490/*
491 - p_bre - BRE parser top level, anchoring and concatenation
492 * Giving end1 as OUT essentially eliminates the end1/end2 check.
493 *
494 * This implementation is a bit of a kludge, in that a trailing $ is first
495 * taken as an ordinary character and then revised to be an anchor. The
496 * only undesirable side effect is that '$' gets included as a character
497 * category in such cases. This is fairly harmless; not worth fixing.
498 * The amount of lookahead needed to avoid this kludge is excessive.
499 */
500static void
501p_bre(struct parse *p,
502 int end1, /* first terminating character */
503 int end2) /* second terminating character */
504{
505 sopno start = HERE()(p->slen);
506 int first = 1; /* first subexpression? */
507 int wasdollar = 0;
508
509 if (EAT('^')((((p->next < p->end) && (*p->next) == ('^'
))) ? ((p->next++), 1) : 0)
) {
510 EMIT(OBOL, 0)doemit(p, (sop)((3LU<<((unsigned)27))), (size_t)(0));
511 p->g->iflags |= USEBOL01;
512 p->g->nbol++;
513 }
514 while (MORE()(p->next < p->end) && !SEETWO(end1, end2)((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == (end1) && (*(p->next
+1)) == (end2))
) {
515 wasdollar = p_simp_re(p, first);
516 first = 0;
517 }
518 if (wasdollar) { /* oops, that was a trailing anchor */
519 DROP(1)(p->slen -= (1));
520 EMIT(OEOL, 0)doemit(p, (sop)((4LU<<((unsigned)27))), (size_t)(0));
521 p->g->iflags |= USEEOL02;
522 p->g->neol++;
523 }
524
525 REQUIRE(HERE() != start, REG_EMPTY)(void)(((p->slen) != start) || seterr(p, (14))); /* require nonempty */
526}
527
528/*
529 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
530 */
531static int /* was the simple RE an unbackslashed $? */
532p_simp_re(struct parse *p,
533 int starordinary) /* is a leading * an ordinary character? */
534{
535 int c;
536 int count;
537 int count2;
538 sopno pos;
539 int i;
540 sopno subno;
541# define BACKSL(1<<8) (1<<CHAR_BIT8)
542
543 pos = HERE()(p->slen); /* repetition op, if any, covers from here */
544
545 assert(MORE())((void) (0)); /* caller should have ensured this */
546 c = GETNEXT()(*p->next++);
547 if (c == '\\') {
548 REQUIRE(MORE(), REG_EESCAPE)(void)(((p->next < p->end)) || seterr(p, (5)));
549 c = BACKSL(1<<8) | GETNEXT()(*p->next++);
550 }
551 switch (c) {
552 case '.':
553 if (p->g->cflags&REG_NEWLINE0010)
554 nonnewline(p);
555 else
556 EMIT(OANY, 0)doemit(p, (sop)((5LU<<((unsigned)27))), (size_t)(0));
557 break;
558 case '[':
559 p_bracket(p);
560 break;
561 case BACKSL(1<<8)|'{':
562 SETERROR(REG_BADRPT)seterr(p, (13));
563 break;
564 case BACKSL(1<<8)|'(':
565 p->g->nsub++;
566 subno = p->g->nsub;
567 if (subno < NPAREN10)
568 p->pbegin[subno] = HERE()(p->slen);
569 EMIT(OLPAREN, subno)doemit(p, (sop)((13LU<<((unsigned)27))), (size_t)(subno
))
;
570 /* the MORE here is an error heuristic */
571 if (MORE()(p->next < p->end) && !SEETWO('\\', ')')((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == ('\\') && (*(p->next
+1)) == (')'))
)
572 p_bre(p, '\\', ')');
573 if (subno < NPAREN10) {
574 p->pend[subno] = HERE()(p->slen);
575 assert(p->pend[subno] != 0)((void) (0));
576 }
577 EMIT(ORPAREN, subno)doemit(p, (sop)((14LU<<((unsigned)27))), (size_t)(subno
))
;
578 REQUIRE(EATTWO('\\', ')'), REG_EPAREN)(void)((((((p->next < p->end) && (p->next
+1 < p->end) && (*p->next) == ('\\') &&
(*(p->next+1)) == (')'))) ? ((p->next += 2), 1) : 0)) ||
seterr(p, (8)))
;
579 break;
580 case BACKSL(1<<8)|')': /* should not get here -- must be user */
581 case BACKSL(1<<8)|'}':
582 SETERROR(REG_EPAREN)seterr(p, (8));
583 break;
584 case BACKSL(1<<8)|'1':
585 case BACKSL(1<<8)|'2':
586 case BACKSL(1<<8)|'3':
587 case BACKSL(1<<8)|'4':
588 case BACKSL(1<<8)|'5':
589 case BACKSL(1<<8)|'6':
590 case BACKSL(1<<8)|'7':
591 case BACKSL(1<<8)|'8':
592 case BACKSL(1<<8)|'9':
593 i = (c&~BACKSL(1<<8)) - '0';
594 assert(i < NPAREN)((void) (0));
595 if (p->pend[i] != 0) {
596 assert(i <= p->g->nsub)((void) (0));
597 EMIT(OBACK_, i)doemit(p, (sop)((7LU<<((unsigned)27))), (size_t)(i));
598 assert(p->pbegin[i] != 0)((void) (0));
599 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN)((void) (0));
600 assert(OP(p->strip[p->pend[i]]) == ORPAREN)((void) (0));
601 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
602 EMIT(O_BACK, i)doemit(p, (sop)((8LU<<((unsigned)27))), (size_t)(i));
603 } else
604 SETERROR(REG_ESUBREG)seterr(p, (6));
605 p->g->backrefs = 1;
606 break;
607 case '*':
608 REQUIRE(starordinary, REG_BADRPT)(void)((starordinary) || seterr(p, (13)));
609 /* FALLTHROUGH */
610 default:
611 ordinary(p, (char)c);
612 break;
613 }
614
615 if (EAT('*')((((p->next < p->end) && (*p->next) == ('*'
))) ? ((p->next++), 1) : 0)
) { /* implemented as +? */
616 /* this case does not require the (y|) trick, noKLUDGE */
617 INSERT(OPLUS_, pos)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(pos)+1, pos)
;
618 ASTERN(O_PLUS, pos)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
619 INSERT(OQUEST_, pos)doinsert(p, (sop)((11LU<<((unsigned)27))), (p->slen)
-(pos)+1, pos)
;
620 ASTERN(O_QUEST, pos)doemit(p, (sop)((12LU<<((unsigned)27))), (size_t)((p->
slen)-pos))
;
621 } else if (EATTWO('\\', '{')((((p->next < p->end) && (p->next+1 < p
->end) && (*p->next) == ('\\') && (*(p->
next+1)) == ('{'))) ? ((p->next += 2), 1) : 0)
) {
622 count = p_count(p);
623 if (EAT(',')((((p->next < p->end) && (*p->next) == (','
))) ? ((p->next++), 1) : 0)
) {
624 if (MORE()(p->next < p->end) && isdigit((uch)PEEK())((*__ctype_b_loc ())[(int) (((uch)(*p->next)))] & (unsigned
short int) _ISdigit)
) {
625 count2 = p_count(p);
626 REQUIRE(count <= count2, REG_BADBR)(void)((count <= count2) || seterr(p, (10)));
627 } else /* single number with comma */
628 count2 = INFINITY(255 + 1);
629 } else /* just a single number */
630 count2 = count;
631 repeat(p, pos, count, count2);
632 if (!EATTWO('\\', '}')((((p->next < p->end) && (p->next+1 < p
->end) && (*p->next) == ('\\') && (*(p->
next+1)) == ('}'))) ? ((p->next += 2), 1) : 0)
) { /* error heuristics */
633 while (MORE()(p->next < p->end) && !SEETWO('\\', '}')((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == ('\\') && (*(p->next
+1)) == ('}'))
)
634 NEXT()(p->next++);
635 REQUIRE(MORE(), REG_EBRACE)(void)(((p->next < p->end)) || seterr(p, (9)));
636 SETERROR(REG_BADBR)seterr(p, (10));
637 }
638 } else if (c == '$') /* $ (but not \$) ends it */
639 return(1);
640
641 return(0);
642}
643
644/*
645 - p_count - parse a repetition count
646 */
647static int /* the value */
648p_count(struct parse *p)
649{
650 int count = 0;
651 int ndigits = 0;
652
653 while (MORE()(p->next < p->end) && isdigit((uch)PEEK())((*__ctype_b_loc ())[(int) (((uch)(*p->next)))] & (unsigned
short int) _ISdigit)
&& count <= DUPMAX255) {
654 count = count*10 + (GETNEXT()(*p->next++) - '0');
655 ndigits++;
656 }
657
658 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR)(void)((ndigits > 0 && count <= 255) || seterr(
p, (10)))
;
659 return(count);
660}
661
662/*
663 - p_bracket - parse a bracketed character list
664 *
665 * Note a significant property of this code: if the allocset() did SETERROR,
666 * no set operations are done.
667 */
668static void
669p_bracket(struct parse *p)
670{
671 cset *cs;
672 int invert = 0;
673
674 /* Dept of Truly Sickening Special-Case Kludges */
675 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6)(__extension__ (__builtin_constant_p (6) && ((__builtin_constant_p
(p->next) && strlen (p->next) < ((size_t) (
6))) || (__builtin_constant_p ("[:<:]]") && strlen
("[:<:]]") < ((size_t) (6)))) ? __extension__ ({ size_t
__s1_len, __s2_len; (__builtin_constant_p (p->next) &&
__builtin_constant_p ("[:<:]]") && (__s1_len = __builtin_strlen
(p->next), __s2_len = __builtin_strlen ("[:<:]]"), (!(
(size_t)(const void *)((p->next) + 1) - (size_t)(const void
*)(p->next) == 1) || __s1_len >= 4) && (!((size_t
)(const void *)(("[:<:]]") + 1) - (size_t)(const void *)("[:<:]]"
) == 1) || __s2_len >= 4)) ? __builtin_strcmp (p->next,
"[:<:]]") : (__builtin_constant_p (p->next) &&
((size_t)(const void *)((p->next) + 1) - (size_t)(const void
*)(p->next) == 1) && (__s1_len = __builtin_strlen
(p->next), __s1_len < 4) ? (__builtin_constant_p ("[:<:]]"
) && ((size_t)(const void *)(("[:<:]]") + 1) - (size_t
)(const void *)("[:<:]]") == 1) ? __builtin_strcmp (p->
next, "[:<:]]") : (__extension__ ({ const unsigned char *__s2
= (const unsigned char *) (const char *) ("[:<:]]"); int __result
= (((const unsigned char *) (const char *) (p->next))[0] -
__s2[0]); if (__s1_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) (p->next))[1] -
__s2[1]); if (__s1_len > 1 && __result == 0) { __result
= (((const unsigned char *) (const char *) (p->next))[2] -
__s2[2]); if (__s1_len > 2 && __result == 0) __result
= (((const unsigned char *) (const char *) (p->next))[3] -
__s2[3]); } } __result; }))) : (__builtin_constant_p ("[:<:]]"
) && ((size_t)(const void *)(("[:<:]]") + 1) - (size_t
)(const void *)("[:<:]]") == 1) && (__s2_len = __builtin_strlen
("[:<:]]"), __s2_len < 4) ? (__builtin_constant_p (p->
next) && ((size_t)(const void *)((p->next) + 1) - (
size_t)(const void *)(p->next) == 1) ? __builtin_strcmp (p
->next, "[:<:]]") : -(__extension__ ({ const unsigned char
*__s2 = (const unsigned char *) (const char *) (p->next);
int __result = (((const unsigned char *) (const char *) ("[:<:]]"
))[0] - __s2[0]); if (__s2_len > 0 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("[:<:]]"
))[1] - __s2[1]); if (__s2_len > 1 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("[:<:]]"
))[2] - __s2[2]); if (__s2_len > 2 && __result == 0
) __result = (((const unsigned char *) (const char *) ("[:<:]]"
))[3] - __s2[3]); } } __result; }))) : __builtin_strcmp (p->
next, "[:<:]]")))); }) : strncmp (p->next, "[:<:]]",
6)))
== 0) {
676 EMIT(OBOW, 0)doemit(p, (sop)((19LU<<((unsigned)27))), (size_t)(0));
677 NEXTn(6)(p->next += (6));
678 return;
679 }
680 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6)(__extension__ (__builtin_constant_p (6) && ((__builtin_constant_p
(p->next) && strlen (p->next) < ((size_t) (
6))) || (__builtin_constant_p ("[:>:]]") && strlen
("[:>:]]") < ((size_t) (6)))) ? __extension__ ({ size_t
__s1_len, __s2_len; (__builtin_constant_p (p->next) &&
__builtin_constant_p ("[:>:]]") && (__s1_len = __builtin_strlen
(p->next), __s2_len = __builtin_strlen ("[:>:]]"), (!(
(size_t)(const void *)((p->next) + 1) - (size_t)(const void
*)(p->next) == 1) || __s1_len >= 4) && (!((size_t
)(const void *)(("[:>:]]") + 1) - (size_t)(const void *)("[:>:]]"
) == 1) || __s2_len >= 4)) ? __builtin_strcmp (p->next,
"[:>:]]") : (__builtin_constant_p (p->next) &&
((size_t)(const void *)((p->next) + 1) - (size_t)(const void
*)(p->next) == 1) && (__s1_len = __builtin_strlen
(p->next), __s1_len < 4) ? (__builtin_constant_p ("[:>:]]"
) && ((size_t)(const void *)(("[:>:]]") + 1) - (size_t
)(const void *)("[:>:]]") == 1) ? __builtin_strcmp (p->
next, "[:>:]]") : (__extension__ ({ const unsigned char *__s2
= (const unsigned char *) (const char *) ("[:>:]]"); int __result
= (((const unsigned char *) (const char *) (p->next))[0] -
__s2[0]); if (__s1_len > 0 && __result == 0) { __result
= (((const unsigned char *) (const char *) (p->next))[1] -
__s2[1]); if (__s1_len > 1 && __result == 0) { __result
= (((const unsigned char *) (const char *) (p->next))[2] -
__s2[2]); if (__s1_len > 2 && __result == 0) __result
= (((const unsigned char *) (const char *) (p->next))[3] -
__s2[3]); } } __result; }))) : (__builtin_constant_p ("[:>:]]"
) && ((size_t)(const void *)(("[:>:]]") + 1) - (size_t
)(const void *)("[:>:]]") == 1) && (__s2_len = __builtin_strlen
("[:>:]]"), __s2_len < 4) ? (__builtin_constant_p (p->
next) && ((size_t)(const void *)((p->next) + 1) - (
size_t)(const void *)(p->next) == 1) ? __builtin_strcmp (p
->next, "[:>:]]") : -(__extension__ ({ const unsigned char
*__s2 = (const unsigned char *) (const char *) (p->next);
int __result = (((const unsigned char *) (const char *) ("[:>:]]"
))[0] - __s2[0]); if (__s2_len > 0 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("[:>:]]"
))[1] - __s2[1]); if (__s2_len > 1 && __result == 0
) { __result = (((const unsigned char *) (const char *) ("[:>:]]"
))[2] - __s2[2]); if (__s2_len > 2 && __result == 0
) __result = (((const unsigned char *) (const char *) ("[:>:]]"
))[3] - __s2[3]); } } __result; }))) : __builtin_strcmp (p->
next, "[:>:]]")))); }) : strncmp (p->next, "[:>:]]",
6)))
== 0) {
681 EMIT(OEOW, 0)doemit(p, (sop)((20LU<<((unsigned)27))), (size_t)(0));
682 NEXTn(6)(p->next += (6));
683 return;
684 }
685
686 if ((cs = allocset(p)) == NULL((void*)0)) {
687 /* allocset did set error status in p */
688 return;
689 }
690
691 if (EAT('^')((((p->next < p->end) && (*p->next) == ('^'
))) ? ((p->next++), 1) : 0)
)
692 invert++; /* make note to invert set at end */
693 if (EAT(']')((((p->next < p->end) && (*p->next) == (']'
))) ? ((p->next++), 1) : 0)
)
694 CHadd(cs, ']')((cs)->ptr[(uch)(']')] |= (cs)->mask, (cs)->hash += (
']'))
;
695 else if (EAT('-')((((p->next < p->end) && (*p->next) == ('-'
))) ? ((p->next++), 1) : 0)
)
696 CHadd(cs, '-')((cs)->ptr[(uch)('-')] |= (cs)->mask, (cs)->hash += (
'-'))
;
697 while (MORE()(p->next < p->end) && PEEK()(*p->next) != ']' && !SEETWO('-', ']')((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == ('-') && (*(p->next
+1)) == (']'))
)
698 p_b_term(p, cs);
699 if (EAT('-')((((p->next < p->end) && (*p->next) == ('-'
))) ? ((p->next++), 1) : 0)
)
700 CHadd(cs, '-')((cs)->ptr[(uch)('-')] |= (cs)->mask, (cs)->hash += (
'-'))
;
701 MUSTEAT(']', REG_EBRACK)((void)(((p->next < p->end) && (*p->next++
) == (']')) || seterr(p, (7))))
;
702
703 if (p->error != 0) { /* don't mess things up further */
704 freeset(p, cs);
705 return;
706 }
707
708 if (p->g->cflags&REG_ICASE0002) {
709 int i;
710 int ci;
711
712 for (i = p->g->csetsize - 1; i >= 0; i--)
713 if (CHIN(cs, i)((cs)->ptr[(uch)(i)] & (cs)->mask) && isalpha(i)((*__ctype_b_loc ())[(int) ((i))] & (unsigned short int) _ISalpha
)
) {
714 ci = othercase(i);
715 if (ci != i)
716 CHadd(cs, ci)((cs)->ptr[(uch)(ci)] |= (cs)->mask, (cs)->hash += (
ci))
;
717 }
718 if (cs->multis != NULL((void*)0))
719 mccase(p, cs);
720 }
721 if (invert) {
722 int i;
723
724 for (i = p->g->csetsize - 1; i >= 0; i--)
725 if (CHIN(cs, i)((cs)->ptr[(uch)(i)] & (cs)->mask))
726 CHsub(cs, i)((cs)->ptr[(uch)(i)] &= ~(cs)->mask, (cs)->hash -=
(i))
;
727 else
728 CHadd(cs, i)((cs)->ptr[(uch)(i)] |= (cs)->mask, (cs)->hash += (i
))
;
729 if (p->g->cflags&REG_NEWLINE0010)
730 CHsub(cs, '\n')((cs)->ptr[(uch)('\n')] &= ~(cs)->mask, (cs)->hash
-= ('\n'))
;
731 if (cs->multis != NULL((void*)0))
732 mcinvert(p, cs);
733 }
734
735 assert(cs->multis == NULL)((void) (0)); /* xxx */
736
737 if (nch(p, cs) == 1) { /* optimize singleton sets */
738 ordinary(p, firstch(p, cs));
739 freeset(p, cs);
740 } else
741 EMIT(OANYOF, freezeset(p, cs))doemit(p, (sop)((6LU<<((unsigned)27))), (size_t)(freezeset
(p, cs)))
;
742}
743
744/*
745 - p_b_term - parse one term of a bracketed character list
746 */
747static void
748p_b_term(struct parse *p, cset *cs)
749{
750 char c;
751 char start, finish;
752 int i;
753
754 /* classify what we've got */
755 switch ((MORE()(p->next < p->end)) ? PEEK()(*p->next) : '\0') {
756 case '[':
757 c = (MORE2()(p->next+1 < p->end)) ? PEEK2()(*(p->next+1)) : '\0';
758 break;
759 case '-':
760 SETERROR(REG_ERANGE)seterr(p, (11));
761 return; /* NOTE RETURN */
762 break;
763 default:
764 c = '\0';
765 break;
766 }
767
768 switch (c) {
769 case ':': /* character class */
770 NEXT2()(p->next += 2);
771 REQUIRE(MORE(), REG_EBRACK)(void)(((p->next < p->end)) || seterr(p, (7)));
772 c = PEEK()(*p->next);
773 REQUIRE(c != '-' && c != ']', REG_ECTYPE)(void)((c != '-' && c != ']') || seterr(p, (4)));
774 p_b_cclass(p, cs);
775 REQUIRE(MORE(), REG_EBRACK)(void)(((p->next < p->end)) || seterr(p, (7)));
776 REQUIRE(EATTWO(':', ']'), REG_ECTYPE)(void)((((((p->next < p->end) && (p->next
+1 < p->end) && (*p->next) == (':') &&
(*(p->next+1)) == (']'))) ? ((p->next += 2), 1) : 0)) ||
seterr(p, (4)))
;
777 break;
778 case '=': /* equivalence class */
779 NEXT2()(p->next += 2);
780 REQUIRE(MORE(), REG_EBRACK)(void)(((p->next < p->end)) || seterr(p, (7)));
781 c = PEEK()(*p->next);
782 REQUIRE(c != '-' && c != ']', REG_ECOLLATE)(void)((c != '-' && c != ']') || seterr(p, (3)));
783 p_b_eclass(p, cs);
784 REQUIRE(MORE(), REG_EBRACK)(void)(((p->next < p->end)) || seterr(p, (7)));
785 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE)(void)((((((p->next < p->end) && (p->next
+1 < p->end) && (*p->next) == ('=') &&
(*(p->next+1)) == (']'))) ? ((p->next += 2), 1) : 0)) ||
seterr(p, (3)))
;
786 break;
787 default: /* symbol, ordinary character, or range */
788/* xxx revision needed for multichar stuff */
789 start = p_b_symbol(p);
790 if (SEE('-')((p->next < p->end) && (*p->next) == ('-'
))
&& MORE2()(p->next+1 < p->end) && PEEK2()(*(p->next+1)) != ']') {
791 /* range */
792 NEXT()(p->next++);
793 if (EAT('-')((((p->next < p->end) && (*p->next) == ('-'
))) ? ((p->next++), 1) : 0)
)
794 finish = '-';
795 else
796 finish = p_b_symbol(p);
797 } else
798 finish = start;
799/* xxx what about signed chars here... */
800 REQUIRE(start <= finish, REG_ERANGE)(void)((start <= finish) || seterr(p, (11)));
801 for (i = start; i <= finish; i++)
802 CHadd(cs, i)((cs)->ptr[(uch)(i)] |= (cs)->mask, (cs)->hash += (i
))
;
803 break;
804 }
805}
806
807/*
808 - p_b_cclass - parse a character-class name and deal with it
809 */
810static void
811p_b_cclass(struct parse *p, cset *cs)
812{
813 char *sp = p->next;
814 struct cclass *cp;
815 size_t len;
816 const char *u;
817 char c;
818
819 while (MORE()(p->next < p->end) && isalpha((uch)PEEK())((*__ctype_b_loc ())[(int) (((uch)(*p->next)))] & (unsigned
short int) _ISalpha)
)
820 NEXT()(p->next++);
821 len = p->next - sp;
822 for (cp = cclasses; cp->name != NULL((void*)0); cp++)
823 if (strncmp(cp->name, sp, len)(__extension__ (__builtin_constant_p (len) && ((__builtin_constant_p
(cp->name) && strlen (cp->name) < ((size_t)
(len))) || (__builtin_constant_p (sp) && strlen (sp)
< ((size_t) (len)))) ? __extension__ ({ size_t __s1_len, __s2_len
; (__builtin_constant_p (cp->name) && __builtin_constant_p
(sp) && (__s1_len = __builtin_strlen (cp->name), __s2_len
= __builtin_strlen (sp), (!((size_t)(const void *)((cp->name
) + 1) - (size_t)(const void *)(cp->name) == 1) || __s1_len
>= 4) && (!((size_t)(const void *)((sp) + 1) - (size_t
)(const void *)(sp) == 1) || __s2_len >= 4)) ? __builtin_strcmp
(cp->name, sp) : (__builtin_constant_p (cp->name) &&
((size_t)(const void *)((cp->name) + 1) - (size_t)(const void
*)(cp->name) == 1) && (__s1_len = __builtin_strlen
(cp->name), __s1_len < 4) ? (__builtin_constant_p (sp)
&& ((size_t)(const void *)((sp) + 1) - (size_t)(const
void *)(sp) == 1) ? __builtin_strcmp (cp->name, sp) : (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) (sp); int __result = (((const unsigned char *) (const
char *) (cp->name))[0] - __s2[0]); if (__s1_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (cp->name))[1] - __s2[1]); if (__s1_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (cp->name))[2] - __s2[2]); if (__s1_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) (cp->name))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p
(sp) && ((size_t)(const void *)((sp) + 1) - (size_t)
(const void *)(sp) == 1) && (__s2_len = __builtin_strlen
(sp), __s2_len < 4) ? (__builtin_constant_p (cp->name)
&& ((size_t)(const void *)((cp->name) + 1) - (size_t
)(const void *)(cp->name) == 1) ? __builtin_strcmp (cp->
name, sp) : -(__extension__ ({ const unsigned char *__s2 = (const
unsigned char *) (const char *) (cp->name); int __result =
(((const unsigned char *) (const char *) (sp))[0] - __s2[0])
; if (__s2_len > 0 && __result == 0) { __result = (
((const unsigned char *) (const char *) (sp))[1] - __s2[1]); if
(__s2_len > 1 && __result == 0) { __result = (((const
unsigned char *) (const char *) (sp))[2] - __s2[2]); if (__s2_len
> 2 && __result == 0) __result = (((const unsigned
char *) (const char *) (sp))[3] - __s2[3]); } } __result; })
)) : __builtin_strcmp (cp->name, sp)))); }) : strncmp (cp->
name, sp, len)))
== 0 && cp->name[len] == '\0')
824 break;
825 if (cp->name == NULL((void*)0)) {
826 /* oops, didn't find it */
827 SETERROR(REG_ECTYPE)seterr(p, (4));
828 return;
829 }
830
831 u = cp->chars;
832 while ((c = *u++) != '\0')
833 CHadd(cs, c)((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c
))
;
834 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
835 MCadd(p, cs, u)mcadd(p, cs, u);
836}
837
838/*
839 - p_b_eclass - parse an equivalence-class name and deal with it
840 *
841 * This implementation is incomplete. xxx
842 */
843static void
844p_b_eclass(struct parse *p, cset *cs)
845{
846 char c;
847
848 c = p_b_coll_elem(p, '=');
849 CHadd(cs, c)((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c
))
;
850}
851
852/*
853 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
854 */
855static char /* value of symbol */
856p_b_symbol(struct parse *p)
857{
858 char value;
859
860 REQUIRE(MORE(), REG_EBRACK)(void)(((p->next < p->end)) || seterr(p, (7)));
861 if (!EATTWO('[', '.')((((p->next < p->end) && (p->next+1 < p
->end) && (*p->next) == ('[') && (*(p->
next+1)) == ('.'))) ? ((p->next += 2), 1) : 0)
)
862 return(GETNEXT()(*p->next++));
863
864 /* collating symbol */
865 value = p_b_coll_elem(p, '.');
866 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE)(void)((((((p->next < p->end) && (p->next
+1 < p->end) && (*p->next) == ('.') &&
(*(p->next+1)) == (']'))) ? ((p->next += 2), 1) : 0)) ||
seterr(p, (3)))
;
867 return(value);
868}
869
870/*
871 - p_b_coll_elem - parse a collating-element name and look it up
872 */
873static char /* value of collating element */
874p_b_coll_elem(struct parse *p,
875 int endc) /* name ended by endc,']' */
876{
877 char *sp = p->next;
878 struct cname *cp;
879 int len;
880
881 while (MORE()(p->next < p->end) && !SEETWO(endc, ']')((p->next < p->end) && (p->next+1 < p->
end) && (*p->next) == (endc) && (*(p->next
+1)) == (']'))
)
882 NEXT()(p->next++);
883 if (!MORE()(p->next < p->end)) {
884 SETERROR(REG_EBRACK)seterr(p, (7));
885 return(0);
886 }
887 len = p->next - sp;
888 for (cp = cnames; cp->name != NULL((void*)0); cp++)
889 if (strncmp(cp->name, sp, len)(__extension__ (__builtin_constant_p (len) && ((__builtin_constant_p
(cp->name) && strlen (cp->name) < ((size_t)
(len))) || (__builtin_constant_p (sp) && strlen (sp)
< ((size_t) (len)))) ? __extension__ ({ size_t __s1_len, __s2_len
; (__builtin_constant_p (cp->name) && __builtin_constant_p
(sp) && (__s1_len = __builtin_strlen (cp->name), __s2_len
= __builtin_strlen (sp), (!((size_t)(const void *)((cp->name
) + 1) - (size_t)(const void *)(cp->name) == 1) || __s1_len
>= 4) && (!((size_t)(const void *)((sp) + 1) - (size_t
)(const void *)(sp) == 1) || __s2_len >= 4)) ? __builtin_strcmp
(cp->name, sp) : (__builtin_constant_p (cp->name) &&
((size_t)(const void *)((cp->name) + 1) - (size_t)(const void
*)(cp->name) == 1) && (__s1_len = __builtin_strlen
(cp->name), __s1_len < 4) ? (__builtin_constant_p (sp)
&& ((size_t)(const void *)((sp) + 1) - (size_t)(const
void *)(sp) == 1) ? __builtin_strcmp (cp->name, sp) : (__extension__
({ const unsigned char *__s2 = (const unsigned char *) (const
char *) (sp); int __result = (((const unsigned char *) (const
char *) (cp->name))[0] - __s2[0]); if (__s1_len > 0 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (cp->name))[1] - __s2[1]); if (__s1_len > 1 &&
__result == 0) { __result = (((const unsigned char *) (const
char *) (cp->name))[2] - __s2[2]); if (__s1_len > 2 &&
__result == 0) __result = (((const unsigned char *) (const char
*) (cp->name))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p
(sp) && ((size_t)(const void *)((sp) + 1) - (size_t)
(const void *)(sp) == 1) && (__s2_len = __builtin_strlen
(sp), __s2_len < 4) ? (__builtin_constant_p (cp->name)
&& ((size_t)(const void *)((cp->name) + 1) - (size_t
)(const void *)(cp->name) == 1) ? __builtin_strcmp (cp->
name, sp) : -(__extension__ ({ const unsigned char *__s2 = (const
unsigned char *) (const char *) (cp->name); int __result =
(((const unsigned char *) (const char *) (sp))[0] - __s2[0])
; if (__s2_len > 0 && __result == 0) { __result = (
((const unsigned char *) (const char *) (sp))[1] - __s2[1]); if
(__s2_len > 1 && __result == 0) { __result = (((const
unsigned char *) (const char *) (sp))[2] - __s2[2]); if (__s2_len
> 2 && __result == 0) __result = (((const unsigned
char *) (const char *) (sp))[3] - __s2[3]); } } __result; })
)) : __builtin_strcmp (cp->name, sp)))); }) : strncmp (cp->
name, sp, len)))
== 0 && cp->name[len] == '\0')
890 return(cp->code); /* known name */
891 if (len == 1)
892 return(*sp); /* single character */
893 SETERROR(REG_ECOLLATE)seterr(p, (3)); /* neither */
894 return(0);
895}
896
897/*
898 - othercase - return the case counterpart of an alphabetic
899 */
900static char /* if no counterpart, return ch */
901othercase(int ch)
902{
903 ch = (uch)ch;
904 assert(isalpha(ch))((void) (0));
905 if (isupper(ch)((*__ctype_b_loc ())[(int) ((ch))] & (unsigned short int)
_ISupper)
)
906 return ((uch)tolower(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p
(ch)) { int __c = (ch); __res = __c < -128 || __c > 255
? __c : (*__ctype_tolower_loc ())[__c]; } else __res = tolower
(ch); } else __res = (*__ctype_tolower_loc ())[(int) (ch)]; __res
; }))
);
907 else if (islower(ch)((*__ctype_b_loc ())[(int) ((ch))] & (unsigned short int)
_ISlower)
)
908 return ((uch)toupper(ch)(__extension__ ({ int __res; if (sizeof (ch) > 1) { if (__builtin_constant_p
(ch)) { int __c = (ch); __res = __c < -128 || __c > 255
? __c : (*__ctype_toupper_loc ())[__c]; } else __res = toupper
(ch); } else __res = (*__ctype_toupper_loc ())[(int) (ch)]; __res
; }))
);
909 else /* peculiar, but could happen */
910 return(ch);
911}
912
913/*
914 - bothcases - emit a dualcase version of a two-case character
915 *
916 * Boy, is this implementation ever a kludge...
917 */
918static void
919bothcases(struct parse *p, int ch)
920{
921 char *oldnext = p->next;
922 char *oldend = p->end;
923 char bracket[3];
924
925 ch = (uch)ch;
926 assert(othercase(ch) != ch)((void) (0)); /* p_bracket() would recurse */
927 p->next = bracket;
928 p->end = bracket+2;
929 bracket[0] = ch;
930 bracket[1] = ']';
931 bracket[2] = '\0';
932 p_bracket(p);
933 assert(p->next == bracket+2)((void) (0));
934 p->next = oldnext;
935 p->end = oldend;
936}
937
938/*
939 - ordinary - emit an ordinary character
940 */
941static void
942ordinary(struct parse *p, int ch)
943{
944 cat_t *cap = p->g->categories;
945
946 if ((p->g->cflags&REG_ICASE0002) && isalpha((uch)ch)((*__ctype_b_loc ())[(int) (((uch)ch))] & (unsigned short
int) _ISalpha)
&& othercase(ch) != ch)
947 bothcases(p, ch);
948 else {
949 EMIT(OCHAR, (uch)ch)doemit(p, (sop)((2LU<<((unsigned)27))), (size_t)((uch)ch
))
;
950 if (cap[ch] == 0)
951 cap[ch] = p->g->ncategories++;
952 }
953}
954
955/*
956 - nonnewline - emit REG_NEWLINE version of OANY
957 *
958 * Boy, is this implementation ever a kludge...
959 */
960static void
961nonnewline(struct parse *p)
962{
963 char *oldnext = p->next;
964 char *oldend = p->end;
965 char bracket[4];
966
967 p->next = bracket;
968 p->end = bracket+3;
969 bracket[0] = '^';
970 bracket[1] = '\n';
971 bracket[2] = ']';
972 bracket[3] = '\0';
973 p_bracket(p);
974 assert(p->next == bracket+3)((void) (0));
975 p->next = oldnext;
976 p->end = oldend;
977}
978
979/*
980 - repeat - generate code for a bounded repetition, recursively if needed
981 */
982static void
983repeat(struct parse *p,
984 sopno start, /* operand from here to end of strip */
985 int from, /* repeated from this number */
986 int to) /* to this number of times (maybe INFINITY) */
987{
988 sopno finish = HERE()(p->slen);
989# define N2 2
990# define INF3 3
991# define REP(f, t)((f)*8 + (t)) ((f)*8 + (t))
992# define MAP(n)(((n) <= 1) ? (n) : ((n) == (255 + 1)) ? 3 : 2) (((n) <= 1) ? (n) : ((n) == INFINITY(255 + 1)) ? INF3 : N2)
993 sopno copy;
994
995 if (p->error != 0) /* head off possible runaway recursion */
996 return;
997
998 assert(from <= to)((void) (0));
999
1000 switch (REP(MAP(from), MAP(to))(((((from) <= 1) ? (from) : ((from) == (255 + 1)) ? 3 : 2)
)*8 + ((((to) <= 1) ? (to) : ((to) == (255 + 1)) ? 3 : 2))
)
) {
1001 case REP(0, 0)((0)*8 + (0)): /* must be user doing this */
1002 DROP(finish-start)(p->slen -= (finish-start)); /* drop the operand */
1003 break;
1004 case REP(0, 1)((0)*8 + (1)): /* as x{1,1}? */
1005 case REP(0, N)((0)*8 + (2)): /* as x{1,n}? */
1006 case REP(0, INF)((0)*8 + (3)): /* as x{1,}? */
1007 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1008 INSERT(OCH_, start)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(start)+1, start)
; /* offset is wrong... */
1009 repeat(p, start+1, 1, to);
1010 ASTERN(OOR1, start)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
1011 AHEAD(start)dofwd(p, start, (p->slen)-(start)); /* ... fix it */
1012 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0));
1013 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1)));
1014 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
1015 break;
1016 case REP(1, 1)((1)*8 + (1)): /* trivial case */
1017 /* done */
1018 break;
1019 case REP(1, N)((1)*8 + (2)): /* as x?x{1,n-1} */
1020 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1021 INSERT(OCH_, start)doinsert(p, (sop)((15LU<<((unsigned)27))), (p->slen)
-(start)+1, start)
;
1022 ASTERN(OOR1, start)doemit(p, (sop)((16LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
1023 AHEAD(start)dofwd(p, start, (p->slen)-(start));
1024 EMIT(OOR2, 0)doemit(p, (sop)((17LU<<((unsigned)27))), (size_t)(0)); /* offset very wrong... */
1025 AHEAD(THERE())dofwd(p, (p->slen - 1), (p->slen)-((p->slen - 1))); /* ...so fix it */
1026 ASTERN(O_CH, THERETHERE())doemit(p, (sop)((18LU<<((unsigned)27))), (size_t)((p->
slen)-(p->slen - 2)))
;
1027 copy = dupl(p, start+1, finish+1);
1028 assert(copy == finish+4)((void) (0));
1029 repeat(p, copy, 1, to-1);
1030 break;
1031 case REP(1, INF)((1)*8 + (3)): /* as x+ */
1032 INSERT(OPLUS_, start)doinsert(p, (sop)((9LU<<((unsigned)27))), (p->slen)-
(start)+1, start)
;
1033 ASTERN(O_PLUS, start)doemit(p, (sop)((10LU<<((unsigned)27))), (size_t)((p->
slen)-start))
;
1034 break;
1035 case REP(N, N)((2)*8 + (2)): /* as xx{m-1,n-1} */
1036 copy = dupl(p, start, finish);
1037 repeat(p, copy, from-1, to-1);
1038 break;
1039 case REP(N, INF)((2)*8 + (3)): /* as xx{n-1,INF} */
1040 copy = dupl(p, start, finish);
1041 repeat(p, copy, from-1, to);
1042 break;
1043 default: /* "can't happen" */
1044 SETERROR(REG_ASSERT)seterr(p, (15)); /* just in case */
1045 break;
1046 }
1047}
1048
1049/*
1050 - seterr - set an error condition
1051 */
1052static int /* useless but makes type checking happy */
1053seterr(struct parse *p, int e)
1054{
1055 if (p->error == 0) /* keep earliest error condition */
1056 p->error = e;
1057 p->next = nuls; /* try to bring things to a halt */
1058 p->end = nuls;
1059 return(0); /* make the return value well-defined */
1060}
1061
1062/*
1063 - allocset - allocate a set of characters for []
1064 */
1065static cset *
1066allocset(struct parse *p)
1067{
1068 int no = p->g->ncsets++;
1069 size_t nc;
1070 size_t nbytes;
1071 cset *cs;
1072 size_t css = (size_t)p->g->csetsize;
1073 int i;
1074
1075 if (no >= p->ncsalloc) { /* need another column of space */
1076 void *ptr;
1077
1078 p->ncsalloc += CHAR_BIT8;
1079 nc = p->ncsalloc;
1080 if (nc > SIZE_MAX(18446744073709551615UL) / sizeof(cset))
1081 goto nomem;
1082 assert(nc % CHAR_BIT == 0)((void) (0));
1083 nbytes = nc / CHAR_BIT8 * css;
1084
1085 ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
1086 if (ptr == NULL((void*)0))
1087 goto nomem;
1088 p->g->sets = ptr;
1089
1090 ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
1091 if (ptr == NULL((void*)0))
1092 goto nomem;
1093 p->g->setbits = ptr;
1094
1095 for (i = 0; i < no; i++)
1096 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT8);
1097
1098 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
1099 }
1100 /* XXX should not happen */
1101 if (p->g->sets == NULL((void*)0) || p->g->setbits == NULL((void*)0))
1102 goto nomem;
1103
1104 cs = &p->g->sets[no];
1105 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT8);
1106 cs->mask = 1 << ((no) % CHAR_BIT8);
1107 cs->hash = 0;
1108 cs->smultis = 0;
1109 cs->multis = NULL((void*)0);
1110
1111 return(cs);
1112nomem:
1113 free(p->g->sets);
1114 p->g->sets = NULL((void*)0);
1115 free(p->g->setbits);
1116 p->g->setbits = NULL((void*)0);
1117
1118 SETERROR(REG_ESPACE)seterr(p, (12));
1119 /* caller's responsibility not to do set ops */
1120 return(NULL((void*)0));
1121}
1122
1123/*
1124 - freeset - free a now-unused set
1125 */
1126static void
1127freeset(struct parse *p, cset *cs)
1128{
1129 size_t i;
1130 cset *top = &p->g->sets[p->g->ncsets];
1131 size_t css = (size_t)p->g->csetsize;
1132
1133 for (i = 0; i < css; i++)
1134 CHsub(cs, i)((cs)->ptr[(uch)(i)] &= ~(cs)->mask, (cs)->hash -=
(i))
;
1135 if (cs == top-1) /* recover only the easy case */
1136 p->g->ncsets--;
1137}
1138
1139/*
1140 - freezeset - final processing on a set of characters
1141 *
1142 * The main task here is merging identical sets. This is usually a waste
1143 * of time (although the hash code minimizes the overhead), but can win
1144 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1145 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1146 * the same value!
1147 */
1148static int /* set number */
1149freezeset(struct parse *p, cset *cs)
1150{
1151 uch h = cs->hash;
1152 size_t i;
1153 cset *top = &p->g->sets[p->g->ncsets];
1154 cset *cs2;
1155 size_t css = (size_t)p->g->csetsize;
1156
1157 /* look for an earlier one which is the same */
1158 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1159 if (cs2->hash == h && cs2 != cs) {
1160 /* maybe */
1161 for (i = 0; i < css; i++)
1162 if (!!CHIN(cs2, i)((cs2)->ptr[(uch)(i)] & (cs2)->mask) != !!CHIN(cs, i)((cs)->ptr[(uch)(i)] & (cs)->mask))
1163 break; /* no */
1164 if (i == css)
1165 break; /* yes */
1166 }
1167
1168 if (cs2 < top) { /* found one */
1169 freeset(p, cs);
1170 cs = cs2;
1171 }
1172
1173 return((int)(cs - p->g->sets));
1174}
1175
1176/*
1177 - firstch - return first character in a set (which must have at least one)
1178 */
1179static int /* character; there is no "none" value */
1180firstch(struct parse *p, cset *cs)
1181{
1182 size_t i;
1183 size_t css = (size_t)p->g->csetsize;
1184
1185 for (i = 0; i < css; i++)
1186 if (CHIN(cs, i)((cs)->ptr[(uch)(i)] & (cs)->mask))
1187 return((char)i);
1188 assert(never)((void) (0));
1189 return(0); /* arbitrary */
1190}
1191
1192/*
1193 - nch - number of characters in a set
1194 */
1195static int
1196nch(struct parse *p, cset *cs)
1197{
1198 size_t i;
1199 size_t css = (size_t)p->g->csetsize;
1200 int n = 0;
1201
1202 for (i = 0; i < css; i++)
1203 if (CHIN(cs, i)((cs)->ptr[(uch)(i)] & (cs)->mask))
1204 n++;
1205 return(n);
1206}
1207
1208/*
1209 - mcadd - add a collating element to a cset
1210 */
1211static void
1212mcadd( struct parse *p, cset *cs, const char *cp)
1213{
1214 size_t oldend = cs->smultis;
1215 void *np;
1216
1217 cs->smultis += strlen(cp) + 1;
1218 np = realloc(cs->multis, cs->smultis);
1219 if (np == NULL((void*)0)) {
1220 if (cs->multis)
1221 free(cs->multis);
1222 cs->multis = NULL((void*)0);
1223 SETERROR(REG_ESPACE)seterr(p, (12));
1224 return;
1225 }
1226 cs->multis = np;
1227
1228 llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1229}
1230
1231/*
1232 - mcinvert - invert the list of collating elements in a cset
1233 *
1234 * This would have to know the set of possibilities. Implementation
1235 * is deferred.
1236 */
1237/* ARGSUSED */
1238static void
1239mcinvert(struct parse *p, cset *cs)
1240{
1241 assert(cs->multis == NULL)((void) (0)); /* xxx */
1242}
1243
1244/*
1245 - mccase - add case counterparts of the list of collating elements in a cset
1246 *
1247 * This would have to know the set of possibilities. Implementation
1248 * is deferred.
1249 */
1250/* ARGSUSED */
1251static void
1252mccase(struct parse *p, cset *cs)
1253{
1254 assert(cs->multis == NULL)((void) (0)); /* xxx */
1255}
1256
1257/*
1258 - isinsets - is this character in any sets?
1259 */
1260static int /* predicate */
1261isinsets(struct re_guts *g, int c)
1262{
1263 uch *col;
1264 int i;
1265 int ncols = (g->ncsets+(CHAR_BIT8-1)) / CHAR_BIT8;
1266 unsigned uc = (uch)c;
1267
1268 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1269 if (col[uc] != 0)
1270 return(1);
1271 return(0);
1272}
1273
1274/*
1275 - samesets - are these two characters in exactly the same sets?
1276 */
1277static int /* predicate */
1278samesets(struct re_guts *g, int c1, int c2)
1279{
1280 uch *col;
1281 int i;
1282 int ncols = (g->ncsets+(CHAR_BIT8-1)) / CHAR_BIT8;
1283 unsigned uc1 = (uch)c1;
1284 unsigned uc2 = (uch)c2;
1285
1286 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1287 if (col[uc1] != col[uc2])
1288 return(0);
1289 return(1);
1290}
1291
1292/*
1293 - categorize - sort out character categories
1294 */
1295static void
1296categorize(struct parse *p, struct re_guts *g)
1297{
1298 cat_t *cats = g->categories;
1299 int c;
1300 int c2;
1301 cat_t cat;
1302
1303 /* avoid making error situations worse */
1304 if (p->error != 0)
1305 return;
1306
1307 for (c = CHAR_MIN(-127 -1); c <= CHAR_MAX127; c++)
1308 if (cats[c] == 0 && isinsets(g, c)) {
1309 cat = g->ncategories++;
1310 cats[c] = cat;
1311 for (c2 = c+1; c2 <= CHAR_MAX127; c2++)
1312 if (cats[c2] == 0 && samesets(g, c, c2))
1313 cats[c2] = cat;
1314 }
1315}
1316
1317/*
1318 - dupl - emit a duplicate of a bunch of sops
1319 */
1320static sopno /* start of duplicate */
1321dupl(struct parse *p,
1322 sopno start, /* from here */
1323 sopno finish) /* to this less one */
1324{
1325 sopno ret = HERE()(p->slen);
1326 sopno len = finish - start;
1327
1328 assert(finish >= start)((void) (0));
1329 if (len == 0)
1330 return(ret);
1331 enlarge(p, p->ssize + len); /* this many unexpected additions */
1332 assert(p->ssize >= p->slen + len)((void) (0));
1333 (void) memmove((char *)(p->strip + p->slen),
1334 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1335 p->slen += len;
1336 return(ret);
1337}
1338
1339/*
1340 - doemit - emit a strip operator
1341 *
1342 * It might seem better to implement this as a macro with a function as
1343 * hard-case backup, but it's just too big and messy unless there are
1344 * some changes to the data structures. Maybe later.
1345 */
1346static void
1347doemit(struct parse *p, sop op, size_t opnd)
1348{
1349 /* avoid making error situations worse */
1350 if (p->error != 0)
1351 return;
1352
1353 /* deal with oversize operands ("can't happen", more or less) */
1354 assert(opnd < 1<<OPSHIFT)((void) (0));
1355
1356 /* deal with undersized strip */
1357 if (p->slen >= p->ssize)
1358 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1359 assert(p->slen < p->ssize)((void) (0));
1360
1361 /* finally, it's all reduced to the easy case */
1362 p->strip[p->slen++] = SOP(op, opnd)((op)|(opnd));
1363}
1364
1365/*
1366 - doinsert - insert a sop into the strip
1367 */
1368static void
1369doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1370{
1371 sopno sn;
1372 sop s;
1373 int i;
1374
1375 /* avoid making error situations worse */
1376 if (p->error != 0)
1377 return;
1378
1379 sn = HERE()(p->slen);
1380 EMIT(op, opnd)doemit(p, (sop)(op), (size_t)(opnd)); /* do checks, ensure space */
1381 assert(HERE() == sn+1)((void) (0));
1382 s = p->strip[sn];
1383
1384 /* adjust paren pointers */
1385 assert(pos > 0)((void) (0));
1386 for (i = 1; i < NPAREN10; i++) {
1387 if (p->pbegin[i] >= pos) {
1388 p->pbegin[i]++;
1389 }
1390 if (p->pend[i] >= pos) {
1391 p->pend[i]++;
1392 }
1393 }
1394
1395 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1396 (HERE()(p->slen)-pos-1)*sizeof(sop));
1397 p->strip[pos] = s;
1398}
1399
1400/*
1401 - dofwd - complete a forward reference
1402 */
1403static void
1404dofwd(struct parse *p, sopno pos, sop value)
1405{
1406 /* avoid making error situations worse */
1407 if (p->error != 0)
1408 return;
1409
1410 assert(value < 1<<OPSHIFT)((void) (0));
1411 p->strip[pos] = OP(p->strip[pos])((p->strip[pos])&0xf8000000LU) | value;
1412}
1413
1414/*
1415 - enlarge - enlarge the strip
1416 */
1417static void
1418enlarge(struct parse *p, sopno size)
1419{
1420 sop *sp;
1421
1422 if (p->ssize >= size)
1423 return;
1424
1425 if ((uintptr_t)size > SIZE_MAX(18446744073709551615UL) / sizeof(sop)) {
1426 SETERROR(REG_ESPACE)seterr(p, (12));
1427 return;
1428 }
1429
1430 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1431 if (sp == NULL((void*)0)) {
1432 SETERROR(REG_ESPACE)seterr(p, (12));
1433 return;
1434 }
1435 p->strip = sp;
1436 p->ssize = size;
1437}
1438
1439/*
1440 - stripsnug - compact the strip
1441 */
1442static void
1443stripsnug(struct parse *p, struct re_guts *g)
1444{
1445 g->nstates = p->slen;
1446 if ((uintptr_t)p->slen > SIZE_MAX(18446744073709551615UL) / sizeof(sop)) {
1447 g->strip = p->strip;
1448 SETERROR(REG_ESPACE)seterr(p, (12));
1449 return;
1450 }
1451
1452 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1453 if (g->strip == NULL((void*)0)) {
1454 SETERROR(REG_ESPACE)seterr(p, (12));
1455 g->strip = p->strip;
1456 }
1457}
1458
1459/*
1460 - findmust - fill in must and mlen with longest mandatory literal string
1461 *
1462 * This algorithm could do fancy things like analyzing the operands of |
1463 * for common subsequences. Someday. This code is simple and finds most
1464 * of the interesting cases.
1465 *
1466 * Note that must and mlen got initialized during setup.
1467 */
1468static void
1469findmust(struct parse *p, struct re_guts *g)
1470{
1471 sop *scan;
1472 sop *start = 0; /* start initialized in the default case, after that */
1473 sop *newstart = 0; /* newstart was initialized in the OCHAR case */
1474 sopno newlen;
1475 sop s;
1476 char *cp;
1477 sopno i;
1478
1479 /* avoid making error situations worse */
1480 if (p->error != 0)
1
Assuming the condition is false
2
Taking false branch
1481 return;
1482
1483 /* find the longest OCHAR sequence in strip */
1484 newlen = 0;
1485 scan = g->strip + 1;
1486 do {
7
Loop condition is false. Exiting loop
1487 s = *scan++;
1488 switch (OP(s)((s)&0xf8000000LU)) {
3
Control jumps to the 'default' case at line 1512
1489 case OCHAR(2LU<<((unsigned)27)): /* sequence member */
1490 if (newlen == 0) /* new sequence */
1491 newstart = scan - 1;
1492 newlen++;
1493 break;
1494 case OPLUS_(9LU<<((unsigned)27)): /* things that don't break one */
1495 case OLPAREN(13LU<<((unsigned)27)):
1496 case ORPAREN(14LU<<((unsigned)27)):
1497 break;
1498 case OQUEST_(11LU<<((unsigned)27)): /* things that must be skipped */
1499 case OCH_(15LU<<((unsigned)27)):
1500 scan--;
1501 do {
1502 scan += OPND(s)((s)&0x07ffffffLU);
1503 s = *scan;
1504 /* assert() interferes w debug printouts */
1505 if (OP(s)((s)&0xf8000000LU) != O_QUEST(12LU<<((unsigned)27)) && OP(s)((s)&0xf8000000LU) != O_CH(18LU<<((unsigned)27)) &&
1506 OP(s)((s)&0xf8000000LU) != OOR2(17LU<<((unsigned)27))) {
1507 g->iflags |= REGEX_BAD04;
1508 return;
1509 }
1510 } while (OP(s)((s)&0xf8000000LU) != O_QUEST(12LU<<((unsigned)27)) && OP(s)((s)&0xf8000000LU) != O_CH(18LU<<((unsigned)27)));
1511 /* fallthrough */
1512 default: /* things that break a sequence */
1513 if (newlen > g->mlen) { /* ends one */
4
Assuming the condition is false
5
Taking false branch
1514 start = newstart;
1515 g->mlen = newlen;
1516 }
1517 newlen = 0;
1518 break;
6
Execution continues on line 1520
1519 }
1520 } while (OP(s)((s)&0xf8000000LU) != OEND(1LU<<((unsigned)27)));
1521
1522 if (g->mlen == 0) /* there isn't one */
8
Assuming the condition is false
9
Taking false branch
1523 return;
1524
1525 /* turn it into a character string */
1526 g->must = malloc((size_t)g->mlen + 1);
1527 if (g->must == NULL((void*)0)) { /* argh; just forget it */
10
Assuming the condition is false
11
Taking false branch
1528 g->mlen = 0;
1529 return;
1530 }
1531 cp = g->must;
1532 scan = start;
1533 for (i = g->mlen; i > 0; i--) {
12
Loop condition is true. Entering loop body
1534 while (OP(s = *scan++)((s = *scan++)&0xf8000000LU) != OCHAR(2LU<<((unsigned)27)))
13
Within the expansion of the macro 'OP':
a
Null pointer value stored to 'scan'
b
Dereference of null pointer
1535 continue;
1536 assert(cp < g->must + g->mlen)((void) (0));
1537 *cp++ = (char)OPND(s)((s)&0x07ffffffLU);
1538 }
1539 assert(cp == g->must + g->mlen)((void) (0));
1540 *cp++ = '\0'; /* just on general principles */
1541}
1542
1543/*
1544 - pluscount - count + nesting
1545 */
1546static sopno /* nesting depth */
1547pluscount(struct parse *p, struct re_guts *g)
1548{
1549 sop *scan;
1550 sop s;
1551 sopno plusnest = 0;
1552 sopno maxnest = 0;
1553
1554 if (p->error != 0)
1555 return(0); /* there may not be an OEND */
1556
1557 scan = g->strip + 1;
1558 do {
1559 s = *scan++;
1560 switch (OP(s)((s)&0xf8000000LU)) {
1561 case OPLUS_(9LU<<((unsigned)27)):
1562 plusnest++;
1563 break;
1564 case O_PLUS(10LU<<((unsigned)27)):
1565 if (plusnest > maxnest)
1566 maxnest = plusnest;
1567 plusnest--;
1568 break;
1569 }
1570 } while (OP(s)((s)&0xf8000000LU) != OEND(1LU<<((unsigned)27)));
1571 if (plusnest != 0)
1572 g->iflags |= REGEX_BAD04;
1573 return(maxnest);
1574}