LLVM  6.0.0svn
regengine.inc
Go to the documentation of this file.
1 /*-
2  * This code is derived from OpenBSD's libc/regex, original license follows:
3  *
4  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5  * Copyright (c) 1992, 1993, 1994
6  * The Regents of the University of California. All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Henry Spencer.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  * may be used to endorse or promote products derived from this software
21  * without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * @(#)engine.c 8.5 (Berkeley) 3/20/94
36  */
37 
38 /*
39  * The matching engine and friends. This file is #included by regexec.c
40  * after suitable #defines of a variety of macros used herein, so that
41  * different state representations can be used without duplicating masses
42  * of code.
43  */
44 
45 #ifdef SNAMES
46 #define matcher smatcher
47 #define fast sfast
48 #define slow sslow
49 #define dissect sdissect
50 #define backref sbackref
51 #define step sstep
52 #define print sprint
53 #define at sat
54 #define match smat
55 #define nope snope
56 #endif
57 #ifdef LNAMES
58 #define matcher lmatcher
59 #define fast lfast
60 #define slow lslow
61 #define dissect ldissect
62 #define backref lbackref
63 #define step lstep
64 #define print lprint
65 #define at lat
66 #define match lmat
67 #define nope lnope
68 #endif
69 
70 /* another structure passed up and down to avoid zillions of parameters */
71 struct match {
72  struct re_guts *g;
73  int eflags;
74  llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
75  const char *offp; /* offsets work from here */
76  const char *beginp; /* start of string -- virtual NUL precedes */
77  const char *endp; /* end of string -- virtual NUL here */
78  const char *coldp; /* can be no match starting before here */
79  const char **lastpos; /* [nplus+1] */
80  STATEVARS;
81  states st; /* current states */
82  states fresh; /* states for a fresh start */
83  states tmp; /* temporary */
84  states empty; /* empty set of states */
85 };
86 
87 static int matcher(struct re_guts *, const char *, size_t,
88  llvm_regmatch_t[], int);
89 static const char *dissect(struct match *, const char *, const char *, sopno,
90  sopno);
91 static const char *backref(struct match *, const char *, const char *, sopno,
92  sopno, sopno, int);
93 static const char *fast(struct match *, const char *, const char *, sopno, sopno);
94 static const char *slow(struct match *, const char *, const char *, sopno, sopno);
95 static states step(struct re_guts *, sopno, sopno, states, int, states);
96 #define MAX_RECURSION 100
97 #define BOL (OUT+1)
98 #define EOL (BOL+1)
99 #define BOLEOL (BOL+2)
100 #define NOTHING (BOL+3)
101 #define BOW (BOL+4)
102 #define EOW (BOL+5)
103 #define CODEMAX (BOL+5) /* highest code used */
104 #define NONCHAR(c) ((c) > CHAR_MAX)
105 #define NNONCHAR (CODEMAX-CHAR_MAX)
106 #ifdef REDEBUG
107 static void print(struct match *, char *, states, int, FILE *);
108 #endif
109 #ifdef REDEBUG
110 static void at(struct match *, char *, char *, char *, sopno, sopno);
111 #endif
112 #ifdef REDEBUG
113 static char *pchar(int);
114 #endif
115 
116 #ifdef REDEBUG
117 #define SP(t, s, c) print(m, t, s, c, stdout)
118 #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
119 #define NOTE(str) { if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
120 static int nope = 0;
121 #else
122 #define SP(t, s, c) /* nothing */
123 #define AT(t, p1, p2, s1, s2) /* nothing */
124 #define NOTE(s) /* nothing */
125 #endif
126 
127 /*
128  - matcher - the actual matching engine
129  */
130 static int /* 0 success, REG_NOMATCH failure */
131 matcher(struct re_guts *g, const char *string, size_t nmatch,
132  llvm_regmatch_t pmatch[],
133  int eflags)
134 {
135  const char *endp;
136  size_t i;
137  struct match mv;
138  struct match *m = &mv;
139  const char *dp;
140  const sopno gf = g->firststate+1; /* +1 for OEND */
141  const sopno gl = g->laststate;
142  const char *start;
143  const char *stop;
144 
145  /* simplify the situation where possible */
146  if (g->cflags&REG_NOSUB)
147  nmatch = 0;
148  if (eflags&REG_STARTEND) {
149  start = string + pmatch[0].rm_so;
150  stop = string + pmatch[0].rm_eo;
151  } else {
152  start = string;
153  stop = start + strlen(start);
154  }
155  if (stop < start)
156  return(REG_INVARG);
157 
158  /* prescreening; this does wonders for this rather slow code */
159  if (g->must != NULL) {
160  for (dp = start; dp < stop; dp++)
161  if (*dp == g->must[0] && stop - dp >= g->mlen &&
162  memcmp(dp, g->must, (size_t)g->mlen) == 0)
163  break;
164  if (dp == stop) /* we didn't find g->must */
165  return(REG_NOMATCH);
166  }
167 
168  /* match struct setup */
169  m->g = g;
170  m->eflags = eflags;
171  m->pmatch = NULL;
172  m->lastpos = NULL;
173  m->offp = string;
174  m->beginp = start;
175  m->endp = stop;
176  STATESETUP(m, 4);
177  SETUP(m->st);
178  SETUP(m->fresh);
179  SETUP(m->tmp);
180  SETUP(m->empty);
181  CLEAR(m->empty);
182 
183  /* this loop does only one repetition except for backrefs */
184  for (;;) {
185  endp = fast(m, start, stop, gf, gl);
186  if (endp == NULL) { /* a miss */
187  free(m->pmatch);
188  free((void*)m->lastpos);
189  STATETEARDOWN(m);
190  return(REG_NOMATCH);
191  }
192  if (nmatch == 0 && !g->backrefs)
193  break; /* no further info needed */
194 
195  /* where? */
196  assert(m->coldp != NULL);
197  for (;;) {
198  NOTE("finding start");
199  endp = slow(m, m->coldp, stop, gf, gl);
200  if (endp != NULL)
201  break;
202  assert(m->coldp < m->endp);
203  m->coldp++;
204  }
205  if (nmatch == 1 && !g->backrefs)
206  break; /* no further info needed */
207 
208  /* oh my, they want the subexpressions... */
209  if (m->pmatch == NULL)
210  m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
211  sizeof(llvm_regmatch_t));
212  if (m->pmatch == NULL) {
213  STATETEARDOWN(m);
214  return(REG_ESPACE);
215  }
216  for (i = 1; i <= m->g->nsub; i++)
217  m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
218  if (!g->backrefs && !(m->eflags&REG_BACKR)) {
219  NOTE("dissecting");
220  dp = dissect(m, m->coldp, endp, gf, gl);
221  } else {
222  if (g->nplus > 0 && m->lastpos == NULL)
223  m->lastpos = (const char **)malloc((g->nplus+1) *
224  sizeof(char *));
225  if (g->nplus > 0 && m->lastpos == NULL) {
226  free(m->pmatch);
227  STATETEARDOWN(m);
228  return(REG_ESPACE);
229  }
230  NOTE("backref dissect");
231  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
232  }
233  if (dp != NULL)
234  break;
235 
236  /* uh-oh... we couldn't find a subexpression-level match */
237  assert(g->backrefs); /* must be back references doing it */
238  assert(g->nplus == 0 || m->lastpos != NULL);
239  for (;;) {
240  if (dp != NULL || endp <= m->coldp)
241  break; /* defeat */
242  NOTE("backoff");
243  endp = slow(m, m->coldp, endp-1, gf, gl);
244  if (endp == NULL)
245  break; /* defeat */
246  /* try it on a shorter possibility */
247 #ifndef NDEBUG
248  for (i = 1; i <= m->g->nsub; i++) {
249  assert(m->pmatch[i].rm_so == -1);
250  assert(m->pmatch[i].rm_eo == -1);
251  }
252 #endif
253  NOTE("backoff dissect");
254  dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
255  }
256  assert(dp == NULL || dp == endp);
257  if (dp != NULL) /* found a shorter one */
258  break;
259 
260  /* despite initial appearances, there is no match here */
261  NOTE("false alarm");
262  if (m->coldp == stop)
263  break;
264  start = m->coldp + 1; /* recycle starting later */
265  }
266 
267  /* fill in the details if requested */
268  if (nmatch > 0) {
269  pmatch[0].rm_so = m->coldp - m->offp;
270  pmatch[0].rm_eo = endp - m->offp;
271  }
272  if (nmatch > 1) {
273  assert(m->pmatch != NULL);
274  for (i = 1; i < nmatch; i++)
275  if (i <= m->g->nsub)
276  pmatch[i] = m->pmatch[i];
277  else {
278  pmatch[i].rm_so = -1;
279  pmatch[i].rm_eo = -1;
280  }
281  }
282 
283  if (m->pmatch != NULL)
284  free((char *)m->pmatch);
285  if (m->lastpos != NULL)
286  free((char *)m->lastpos);
287  STATETEARDOWN(m);
288  return(0);
289 }
290 
291 /*
292  - dissect - figure out what matched what, no back references
293  */
294 static const char * /* == stop (success) always */
295 dissect(struct match *m, const char *start, const char *stop, sopno startst,
296  sopno stopst)
297 {
298  int i;
299  sopno ss; /* start sop of current subRE */
300  sopno es; /* end sop of current subRE */
301  const char *sp; /* start of string matched by it */
302  const char *stp; /* string matched by it cannot pass here */
303  const char *rest; /* start of rest of string */
304  const char *tail; /* string unmatched by rest of RE */
305  sopno ssub; /* start sop of subsubRE */
306  sopno esub; /* end sop of subsubRE */
307  const char *ssp; /* start of string matched by subsubRE */
308  const char *sep; /* end of string matched by subsubRE */
309  const char *oldssp; /* previous ssp */
310 
311  AT("diss", start, stop, startst, stopst);
312  sp = start;
313  for (ss = startst; ss < stopst; ss = es) {
314  /* identify end of subRE */
315  es = ss;
316  switch (OP(m->g->strip[es])) {
317  case OPLUS_:
318  case OQUEST_:
319  es += OPND(m->g->strip[es]);
320  break;
321  case OCH_:
322  while (OP(m->g->strip[es]) != O_CH)
323  es += OPND(m->g->strip[es]);
324  break;
325  }
326  es++;
327 
328  /* figure out what it matched */
329  switch (OP(m->g->strip[ss])) {
330  case OEND:
331  assert(nope);
332  break;
333  case OCHAR:
334  sp++;
335  break;
336  case OBOL:
337  case OEOL:
338  case OBOW:
339  case OEOW:
340  break;
341  case OANY:
342  case OANYOF:
343  sp++;
344  break;
345  case OBACK_:
346  case O_BACK:
347  assert(nope);
348  break;
349  /* cases where length of match is hard to find */
350  case OQUEST_:
351  stp = stop;
352  for (;;) {
353  /* how long could this one be? */
354  rest = slow(m, sp, stp, ss, es);
355  assert(rest != NULL); /* it did match */
356  /* could the rest match the rest? */
357  tail = slow(m, rest, stop, es, stopst);
358  if (tail == stop)
359  break; /* yes! */
360  /* no -- try a shorter match for this one */
361  stp = rest - 1;
362  assert(stp >= sp); /* it did work */
363  }
364  ssub = ss + 1;
365  esub = es - 1;
366  /* did innards match? */
367  if (slow(m, sp, rest, ssub, esub) != NULL) {
368  const char *dp = dissect(m, sp, rest, ssub, esub);
369  (void)dp; /* avoid warning if assertions off */
370  assert(dp == rest);
371  } else /* no */
372  assert(sp == rest);
373  sp = rest;
374  break;
375  case OPLUS_:
376  stp = stop;
377  for (;;) {
378  /* how long could this one be? */
379  rest = slow(m, sp, stp, ss, es);
380  assert(rest != NULL); /* it did match */
381  /* could the rest match the rest? */
382  tail = slow(m, rest, stop, es, stopst);
383  if (tail == stop)
384  break; /* yes! */
385  /* no -- try a shorter match for this one */
386  stp = rest - 1;
387  assert(stp >= sp); /* it did work */
388  }
389  ssub = ss + 1;
390  esub = es - 1;
391  ssp = sp;
392  oldssp = ssp;
393  for (;;) { /* find last match of innards */
394  sep = slow(m, ssp, rest, ssub, esub);
395  if (sep == NULL || sep == ssp)
396  break; /* failed or matched null */
397  oldssp = ssp; /* on to next try */
398  ssp = sep;
399  }
400  if (sep == NULL) {
401  /* last successful match */
402  sep = ssp;
403  ssp = oldssp;
404  }
405  assert(sep == rest); /* must exhaust substring */
406  assert(slow(m, ssp, sep, ssub, esub) == rest);
407  {
408  const char *dp = dissect(m, ssp, sep, ssub, esub);
409  (void)dp; /* avoid warning if assertions off */
410  assert(dp == sep);
411  }
412  sp = rest;
413  break;
414  case OCH_:
415  stp = stop;
416  for (;;) {
417  /* how long could this one be? */
418  rest = slow(m, sp, stp, ss, es);
419  assert(rest != NULL); /* it did match */
420  /* could the rest match the rest? */
421  tail = slow(m, rest, stop, es, stopst);
422  if (tail == stop)
423  break; /* yes! */
424  /* no -- try a shorter match for this one */
425  stp = rest - 1;
426  assert(stp >= sp); /* it did work */
427  }
428  ssub = ss + 1;
429  esub = ss + OPND(m->g->strip[ss]) - 1;
430  assert(OP(m->g->strip[esub]) == OOR1);
431  for (;;) { /* find first matching branch */
432  if (slow(m, sp, rest, ssub, esub) == rest)
433  break; /* it matched all of it */
434  /* that one missed, try next one */
435  assert(OP(m->g->strip[esub]) == OOR1);
436  esub++;
437  assert(OP(m->g->strip[esub]) == OOR2);
438  ssub = esub + 1;
439  esub += OPND(m->g->strip[esub]);
440  if (OP(m->g->strip[esub]) == OOR2)
441  esub--;
442  else
443  assert(OP(m->g->strip[esub]) == O_CH);
444  }
445  {
446  const char *dp = dissect(m, sp, rest, ssub, esub);
447  (void)dp; /* avoid warning if assertions off */
448  assert(dp == rest);
449  }
450  sp = rest;
451  break;
452  case O_PLUS:
453  case O_QUEST:
454  case OOR1:
455  case OOR2:
456  case O_CH:
457  assert(nope);
458  break;
459  case OLPAREN:
460  i = OPND(m->g->strip[ss]);
461  assert(0 < i && i <= m->g->nsub);
462  m->pmatch[i].rm_so = sp - m->offp;
463  break;
464  case ORPAREN:
465  i = OPND(m->g->strip[ss]);
466  assert(0 < i && i <= m->g->nsub);
467  m->pmatch[i].rm_eo = sp - m->offp;
468  break;
469  default: /* uh oh */
470  assert(nope);
471  break;
472  }
473  }
474 
475  assert(sp == stop);
476  return(sp);
477 }
478 
479 /*
480  - backref - figure out what matched what, figuring in back references
481  */
482 static const char * /* == stop (success) or NULL (failure) */
483 backref(struct match *m, const char *start, const char *stop, sopno startst,
484  sopno stopst, sopno lev, int rec) /* PLUS nesting level */
485 {
486  int i;
487  sopno ss; /* start sop of current subRE */
488  const char *sp; /* start of string matched by it */
489  sopno ssub; /* start sop of subsubRE */
490  sopno esub; /* end sop of subsubRE */
491  const char *ssp; /* start of string matched by subsubRE */
492  const char *dp;
493  size_t len;
494  int hard;
495  sop s;
496  llvm_regoff_t offsave;
497  cset *cs;
498 
499  AT("back", start, stop, startst, stopst);
500  sp = start;
501 
502  /* get as far as we can with easy stuff */
503  hard = 0;
504  for (ss = startst; !hard && ss < stopst; ss++)
505  switch (OP(s = m->g->strip[ss])) {
506  case OCHAR:
507  if (sp == stop || *sp++ != (char)OPND(s))
508  return(NULL);
509  break;
510  case OANY:
511  if (sp == stop)
512  return(NULL);
513  sp++;
514  break;
515  case OANYOF:
516  cs = &m->g->sets[OPND(s)];
517  if (sp == stop || !CHIN(cs, *sp++))
518  return(NULL);
519  break;
520  case OBOL:
521  if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
522  (sp < m->endp && *(sp-1) == '\n' &&
523  (m->g->cflags&REG_NEWLINE)) )
524  { /* yes */ }
525  else
526  return(NULL);
527  break;
528  case OEOL:
529  if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
530  (sp < m->endp && *sp == '\n' &&
531  (m->g->cflags&REG_NEWLINE)) )
532  { /* yes */ }
533  else
534  return(NULL);
535  break;
536  case OBOW:
537  if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
538  (sp < m->endp && *(sp-1) == '\n' &&
539  (m->g->cflags&REG_NEWLINE)) ||
540  (sp > m->beginp &&
541  !ISWORD(*(sp-1))) ) &&
542  (sp < m->endp && ISWORD(*sp)) )
543  { /* yes */ }
544  else
545  return(NULL);
546  break;
547  case OEOW:
548  if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
549  (sp < m->endp && *sp == '\n' &&
550  (m->g->cflags&REG_NEWLINE)) ||
551  (sp < m->endp && !ISWORD(*sp)) ) &&
552  (sp > m->beginp && ISWORD(*(sp-1))) )
553  { /* yes */ }
554  else
555  return(NULL);
556  break;
557  case O_QUEST:
558  break;
559  case OOR1: /* matches null but needs to skip */
560  ss++;
561  s = m->g->strip[ss];
562  do {
563  assert(OP(s) == OOR2);
564  ss += OPND(s);
565  } while (OP(s = m->g->strip[ss]) != O_CH);
566  /* note that the ss++ gets us past the O_CH */
567  break;
568  default: /* have to make a choice */
569  hard = 1;
570  break;
571  }
572  if (!hard) { /* that was it! */
573  if (sp != stop)
574  return(NULL);
575  return(sp);
576  }
577  ss--; /* adjust for the for's final increment */
578 
579  /* the hard stuff */
580  AT("hard", sp, stop, ss, stopst);
581  s = m->g->strip[ss];
582  switch (OP(s)) {
583  case OBACK_: /* the vilest depths */
584  i = OPND(s);
585  assert(0 < i && i <= m->g->nsub);
586  if (m->pmatch[i].rm_eo == -1)
587  return(NULL);
588  assert(m->pmatch[i].rm_so != -1);
589  len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
590  if (len == 0 && rec++ > MAX_RECURSION)
591  return(NULL);
592  assert(stop - m->beginp >= len);
593  if (sp > stop - len)
594  return(NULL); /* not enough left to match */
595  ssp = m->offp + m->pmatch[i].rm_so;
596  if (memcmp(sp, ssp, len) != 0)
597  return(NULL);
598  while (m->g->strip[ss] != SOP(O_BACK, i))
599  ss++;
600  return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
601  break;
602  case OQUEST_: /* to null or not */
603  dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
604  if (dp != NULL)
605  return(dp); /* not */
606  return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
607  break;
608  case OPLUS_:
609  assert(m->lastpos != NULL);
610  assert(lev+1 <= m->g->nplus);
611  m->lastpos[lev+1] = sp;
612  return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
613  break;
614  case O_PLUS:
615  if (sp == m->lastpos[lev]) /* last pass matched null */
616  return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
617  /* try another pass */
618  m->lastpos[lev] = sp;
619  dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
620  if (dp == NULL)
621  return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
622  else
623  return(dp);
624  break;
625  case OCH_: /* find the right one, if any */
626  ssub = ss + 1;
627  esub = ss + OPND(s) - 1;
628  assert(OP(m->g->strip[esub]) == OOR1);
629  for (;;) { /* find first matching branch */
630  dp = backref(m, sp, stop, ssub, esub, lev, rec);
631  if (dp != NULL)
632  return(dp);
633  /* that one missed, try next one */
634  if (OP(m->g->strip[esub]) == O_CH)
635  return(NULL); /* there is none */
636  esub++;
637  assert(OP(m->g->strip[esub]) == OOR2);
638  ssub = esub + 1;
639  esub += OPND(m->g->strip[esub]);
640  if (OP(m->g->strip[esub]) == OOR2)
641  esub--;
642  else
643  assert(OP(m->g->strip[esub]) == O_CH);
644  }
645  break;
646  case OLPAREN: /* must undo assignment if rest fails */
647  i = OPND(s);
648  assert(0 < i && i <= m->g->nsub);
649  offsave = m->pmatch[i].rm_so;
650  m->pmatch[i].rm_so = sp - m->offp;
651  dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
652  if (dp != NULL)
653  return(dp);
654  m->pmatch[i].rm_so = offsave;
655  return(NULL);
656  break;
657  case ORPAREN: /* must undo assignment if rest fails */
658  i = OPND(s);
659  assert(0 < i && i <= m->g->nsub);
660  offsave = m->pmatch[i].rm_eo;
661  m->pmatch[i].rm_eo = sp - m->offp;
662  dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
663  if (dp != NULL)
664  return(dp);
665  m->pmatch[i].rm_eo = offsave;
666  return(NULL);
667  break;
668  default: /* uh oh */
669  assert(nope);
670  break;
671  }
672 
673  /* "can't happen" */
674  assert(nope);
675  /* NOTREACHED */
676  return NULL;
677 }
678 
679 /*
680  - fast - step through the string at top speed
681  */
682 static const char * /* where tentative match ended, or NULL */
683 fast(struct match *m, const char *start, const char *stop, sopno startst,
684  sopno stopst)
685 {
686  states st = m->st;
687  states fresh = m->fresh;
688  states tmp = m->tmp;
689  const char *p = start;
690  int c = (start == m->beginp) ? OUT : *(start-1);
691  int lastc; /* previous c */
692  int flagch;
693  int i;
694  const char *coldp; /* last p after which no match was underway */
695 
696  CLEAR(st);
697  SET1(st, startst);
698  st = step(m->g, startst, stopst, st, NOTHING, st);
699  ASSIGN(fresh, st);
700  SP("start", st, *p);
701  coldp = NULL;
702  for (;;) {
703  /* next character */
704  lastc = c;
705  c = (p == m->endp) ? OUT : *p;
706  if (EQ(st, fresh))
707  coldp = p;
708 
709  /* is there an EOL and/or BOL between lastc and c? */
710  flagch = '\0';
711  i = 0;
712  if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
713  (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
714  flagch = BOL;
715  i = m->g->nbol;
716  }
717  if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
718  (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
719  flagch = (flagch == BOL) ? BOLEOL : EOL;
720  i += m->g->neol;
721  }
722  if (i != 0) {
723  for (; i > 0; i--)
724  st = step(m->g, startst, stopst, st, flagch, st);
725  SP("boleol", st, c);
726  }
727 
728  /* how about a word boundary? */
729  if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
730  (c != OUT && ISWORD(c)) ) {
731  flagch = BOW;
732  }
733  if ( (lastc != OUT && ISWORD(lastc)) &&
734  (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
735  flagch = EOW;
736  }
737  if (flagch == BOW || flagch == EOW) {
738  st = step(m->g, startst, stopst, st, flagch, st);
739  SP("boweow", st, c);
740  }
741 
742  /* are we done? */
743  if (ISSET(st, stopst) || p == stop)
744  break; /* NOTE BREAK OUT */
745 
746  /* no, we must deal with this character */
747  ASSIGN(tmp, st);
748  ASSIGN(st, fresh);
749  assert(c != OUT);
750  st = step(m->g, startst, stopst, tmp, c, st);
751  SP("aft", st, c);
752  assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
753  p++;
754  }
755 
756  assert(coldp != NULL);
757  m->coldp = coldp;
758  if (ISSET(st, stopst))
759  return(p+1);
760  else
761  return(NULL);
762 }
763 
764 /*
765  - slow - step through the string more deliberately
766  */
767 static const char * /* where it ended */
768 slow(struct match *m, const char *start, const char *stop, sopno startst,
769  sopno stopst)
770 {
771  states st = m->st;
772  states empty = m->empty;
773  states tmp = m->tmp;
774  const char *p = start;
775  int c = (start == m->beginp) ? OUT : *(start-1);
776  int lastc; /* previous c */
777  int flagch;
778  int i;
779  const char *matchp; /* last p at which a match ended */
780 
781  AT("slow", start, stop, startst, stopst);
782  CLEAR(st);
783  SET1(st, startst);
784  SP("sstart", st, *p);
785  st = step(m->g, startst, stopst, st, NOTHING, st);
786  matchp = NULL;
787  for (;;) {
788  /* next character */
789  lastc = c;
790  c = (p == m->endp) ? OUT : *p;
791 
792  /* is there an EOL and/or BOL between lastc and c? */
793  flagch = '\0';
794  i = 0;
795  if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
796  (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
797  flagch = BOL;
798  i = m->g->nbol;
799  }
800  if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
801  (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
802  flagch = (flagch == BOL) ? BOLEOL : EOL;
803  i += m->g->neol;
804  }
805  if (i != 0) {
806  for (; i > 0; i--)
807  st = step(m->g, startst, stopst, st, flagch, st);
808  SP("sboleol", st, c);
809  }
810 
811  /* how about a word boundary? */
812  if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
813  (c != OUT && ISWORD(c)) ) {
814  flagch = BOW;
815  }
816  if ( (lastc != OUT && ISWORD(lastc)) &&
817  (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
818  flagch = EOW;
819  }
820  if (flagch == BOW || flagch == EOW) {
821  st = step(m->g, startst, stopst, st, flagch, st);
822  SP("sboweow", st, c);
823  }
824 
825  /* are we done? */
826  if (ISSET(st, stopst))
827  matchp = p;
828  if (EQ(st, empty) || p == stop)
829  break; /* NOTE BREAK OUT */
830 
831  /* no, we must deal with this character */
832  ASSIGN(tmp, st);
833  ASSIGN(st, empty);
834  assert(c != OUT);
835  st = step(m->g, startst, stopst, tmp, c, st);
836  SP("saft", st, c);
837  assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
838  p++;
839  }
840 
841  return(matchp);
842 }
843 
844 
845 /*
846  - step - map set of states reachable before char to set reachable after
847  */
848 static states
849 step(struct re_guts *g,
850  sopno start, /* start state within strip */
851  sopno stop, /* state after stop state within strip */
852  states bef, /* states reachable before */
853  int ch, /* character or NONCHAR code */
854  states aft) /* states already known reachable after */
855 {
856  cset *cs;
857  sop s;
858  sopno pc;
859  onestate here; /* note, macros know this name */
860  sopno look;
861  int i;
862 
863  for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
864  s = g->strip[pc];
865  switch (OP(s)) {
866  case OEND:
867  assert(pc == stop-1);
868  break;
869  case OCHAR:
870  /* only characters can match */
871  assert(!NONCHAR(ch) || ch != (char)OPND(s));
872  if (ch == (char)OPND(s))
873  FWD(aft, bef, 1);
874  break;
875  case OBOL:
876  if (ch == BOL || ch == BOLEOL)
877  FWD(aft, bef, 1);
878  break;
879  case OEOL:
880  if (ch == EOL || ch == BOLEOL)
881  FWD(aft, bef, 1);
882  break;
883  case OBOW:
884  if (ch == BOW)
885  FWD(aft, bef, 1);
886  break;
887  case OEOW:
888  if (ch == EOW)
889  FWD(aft, bef, 1);
890  break;
891  case OANY:
892  if (!NONCHAR(ch))
893  FWD(aft, bef, 1);
894  break;
895  case OANYOF:
896  cs = &g->sets[OPND(s)];
897  if (!NONCHAR(ch) && CHIN(cs, ch))
898  FWD(aft, bef, 1);
899  break;
900  case OBACK_: /* ignored here */
901  case O_BACK:
902  FWD(aft, aft, 1);
903  break;
904  case OPLUS_: /* forward, this is just an empty */
905  FWD(aft, aft, 1);
906  break;
907  case O_PLUS: /* both forward and back */
908  FWD(aft, aft, 1);
909  i = ISSETBACK(aft, OPND(s));
910  BACK(aft, aft, OPND(s));
911  if (!i && ISSETBACK(aft, OPND(s))) {
912  /* oho, must reconsider loop body */
913  pc -= OPND(s) + 1;
914  INIT(here, pc);
915  }
916  break;
917  case OQUEST_: /* two branches, both forward */
918  FWD(aft, aft, 1);
919  FWD(aft, aft, OPND(s));
920  break;
921  case O_QUEST: /* just an empty */
922  FWD(aft, aft, 1);
923  break;
924  case OLPAREN: /* not significant here */
925  case ORPAREN:
926  FWD(aft, aft, 1);
927  break;
928  case OCH_: /* mark the first two branches */
929  FWD(aft, aft, 1);
930  assert(OP(g->strip[pc+OPND(s)]) == OOR2);
931  FWD(aft, aft, OPND(s));
932  break;
933  case OOR1: /* done a branch, find the O_CH */
934  if (ISSTATEIN(aft, here)) {
935  for (look = 1;
936  OP(s = g->strip[pc+look]) != O_CH;
937  look += OPND(s))
938  assert(OP(s) == OOR2);
939  FWD(aft, aft, look);
940  }
941  break;
942  case OOR2: /* propagate OCH_'s marking */
943  FWD(aft, aft, 1);
944  if (OP(g->strip[pc+OPND(s)]) != O_CH) {
945  assert(OP(g->strip[pc+OPND(s)]) == OOR2);
946  FWD(aft, aft, OPND(s));
947  }
948  break;
949  case O_CH: /* just empty */
950  FWD(aft, aft, 1);
951  break;
952  default: /* ooooops... */
953  assert(nope);
954  break;
955  }
956  }
957 
958  return(aft);
959 }
960 
961 #ifdef REDEBUG
962 /*
963  - print - print a set of states
964  */
965 static void
966 print(struct match *m, char *caption, states st, int ch, FILE *d)
967 {
968  struct re_guts *g = m->g;
969  int i;
970  int first = 1;
971 
972  if (!(m->eflags&REG_TRACE))
973  return;
974 
975  (void)fprintf(d, "%s", caption);
976  if (ch != '\0')
977  (void)fprintf(d, " %s", pchar(ch));
978  for (i = 0; i < g->nstates; i++)
979  if (ISSET(st, i)) {
980  (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
981  first = 0;
982  }
983  (void)fprintf(d, "\n");
984 }
985 
986 /*
987  - at - print current situation
988  */
989 static void
990 at(struct match *m, char *title, char *start, char *stop, sopno startst,
991  sopno stopst)
992 {
993  if (!(m->eflags&REG_TRACE))
994  return;
995 
996  (void)printf("%s %s-", title, pchar(*start));
997  (void)printf("%s ", pchar(*stop));
998  (void)printf("%ld-%ld\n", (long)startst, (long)stopst);
999 }
1000 
1001 #ifndef PCHARDONE
1002 #define PCHARDONE /* never again */
1003 /*
1004  - pchar - make a character printable
1005  *
1006  * Is this identical to regchar() over in debug.c? Well, yes. But a
1007  * duplicate here avoids having a debugging-capable regexec.o tied to
1008  * a matching debug.o, and this is convenient. It all disappears in
1009  * the non-debug compilation anyway, so it doesn't matter much.
1010  */
1011 static char * /* -> representation */
1012 pchar(int ch)
1013 {
1014  static char pbuf[10];
1015 
1016  if (isprint(ch) || ch == ' ')
1017  (void)snprintf(pbuf, sizeof pbuf, "%c", ch);
1018  else
1019  (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
1020  return(pbuf);
1021 }
1022 #endif
1023 #endif
1024 
1025 #undef matcher
1026 #undef fast
1027 #undef slow
1028 #undef dissect
1029 #undef backref
1030 #undef step
1031 #undef print
1032 #undef at
1033 #undef match
1034 #undef nope
#define OUT
Definition: regex2.h:162
#define REG_INVARG
Definition: regex_impl.h:81
#define OBOL
Definition: regex2.h:80
sopno nplus
Definition: regex2.h:156
#define REG_TRACE
Definition: regex_impl.h:89
#define OEOL
Definition: regex2.h:81
long sopno
Definition: regex2.h:69
int backrefs
Definition: regex2.h:155
#define REG_NEWLINE
Definition: regex_impl.h:60
#define OOR2
Definition: regex2.h:94
#define OPLUS_
Definition: regex2.h:86
int mlen
Definition: regex2.h:153
#define ISWORD(c)
Definition: regex2.h:163
#define REG_NOTBOL
Definition: regex_impl.h:86
sop * strip
Definition: regex2.h:135
#define O_CH
Definition: regex2.h:95
#define STATEVARS
Definition: regexec.c:113
#define SET1(v, n)
Definition: regexec.c:109
int cflags
Definition: regex2.h:140
#define SETUP(v)
Definition: regexec.c:118
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
#define CHIN(cs, c)
Definition: regex2.h:121
char * must
Definition: regex2.h:152
sopno firststate
Definition: regex2.h:142
#define OEND
Definition: regex2.h:78
#define states
Definition: regexec.c:106
#define OQUEST_
Definition: regex2.h:88
#define SOP(op, opnd)
Definition: regex2.h:75
#define ISSET(v, n)
Definition: regexec.c:110
#define O_BACK
Definition: regex2.h:85
#define EQ(a, b)
Definition: regexec.c:112
sopno laststate
Definition: regex2.h:143
sopno nstates
Definition: regex2.h:141
#define CLEAR(v)
Definition: regexec.c:107
#define INC(o)
Definition: regexec.c:121
Definition: regex2.h:111
llvm_regoff_t rm_eo
Definition: regex_impl.h:45
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
#define ORPAREN
Definition: regex2.h:91
#define OBOW
Definition: regex2.h:96
#define REG_STARTEND
Definition: regex_impl.h:88
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
#define OANY
Definition: regex2.h:82
#define INIT(o, n)
Definition: regexec.c:120
#define OLPAREN
Definition: regex2.h:90
unsigned first
#define STATESETUP(m, n)
Definition: regexec.c:114
#define BACK(dst, src, n)
Definition: regexec.c:126
#define FWD(dst, src, n)
Definition: regexec.c:125
cset * sets
Definition: regex2.h:138
#define ASSIGN(d, s)
Definition: regexec.c:111
#define REG_BACKR
Definition: regex_impl.h:91
#define OANYOF
Definition: regex2.h:83
#define ISSTATEIN(v, o)
Definition: regexec.c:122
#define REG_NOMATCH
Definition: regex_impl.h:66
#define OEOW
Definition: regex2.h:97
#define OBACK_
Definition: regex2.h:84
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:649
#define onestate
Definition: regexec.c:119
#define O_QUEST
Definition: regex2.h:89
#define STATETEARDOWN(m)
Definition: regexec.c:117
#define REG_NOTEOL
Definition: regex_impl.h:87
size_t nsub
Definition: regex2.h:154
#define OCH_
Definition: regex2.h:92
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm_regoff_t rm_so
Definition: regex_impl.h:44
#define OPND(n)
Definition: regex2.h:74
#define OCHAR
Definition: regex2.h:79
off_t llvm_regoff_t
Definition: regex_impl.h:42
#define O_PLUS
Definition: regex2.h:87
unsigned long sop
Definition: regex2.h:68
#define REG_ESPACE
Definition: regex_impl.h:77
#define OP(n)
Definition: regex2.h:73
#define REG_NOSUB
Definition: regex_impl.h:59
#define OOR1
Definition: regex2.h:93
#define ISSETBACK(v, n)
Definition: regexec.c:127