LCOV - code coverage report
Current view: top level - lib/Support - Regex.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 79 81 97.5 %
Date: 2018-10-20 13:21:21 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- Regex.cpp - Regular Expression matcher implementation -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements a POSIX regular expression matcher.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Support/Regex.h"
      15             : #include "llvm/ADT/SmallVector.h"
      16             : #include "llvm/ADT/StringRef.h"
      17             : #include "llvm/ADT/Twine.h"
      18             : #include <string>
      19             : 
      20             : // Important this comes last because it defines "_REGEX_H_". At least on
      21             : // Darwin, if included before any header that (transitively) includes
      22             : // xlocale.h, this will cause trouble, because of missing regex-related types.
      23             : #include "regex_impl.h"
      24             : 
      25             : using namespace llvm;
      26             : 
      27           2 : Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
      28             : 
      29     1931245 : Regex::Regex(StringRef regex, unsigned Flags) {
      30             :   unsigned flags = 0;
      31     1931245 :   preg = new llvm_regex();
      32     1931245 :   preg->re_endp = regex.end();
      33     1931245 :   if (Flags & IgnoreCase)
      34             :     flags |= REG_ICASE;
      35     1931245 :   if (Flags & Newline)
      36      730610 :     flags |= REG_NEWLINE;
      37     1931245 :   if (!(Flags & BasicRegex))
      38     1931245 :     flags |= REG_EXTENDED;
      39     1931245 :   error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
      40     1931245 : }
      41             : 
      42       15498 : Regex::Regex(Regex &&regex) {
      43       15498 :   preg = regex.preg;
      44       15498 :   error = regex.error;
      45       15498 :   regex.preg = nullptr;
      46       15498 :   regex.error = REG_BADPAT;
      47       15498 : }
      48             : 
      49     3892572 : Regex::~Regex() {
      50     1946286 :   if (preg) {
      51     1930786 :     llvm_regfree(preg);
      52     1930786 :     delete preg;
      53             :   }
      54     1946286 : }
      55             : 
      56      877359 : bool Regex::isValid(std::string &Error) const {
      57      877359 :   if (!error)
      58             :     return true;
      59             : 
      60          11 :   size_t len = llvm_regerror(error, preg, nullptr, 0);
      61             : 
      62          11 :   Error.resize(len - 1);
      63          11 :   llvm_regerror(error, preg, &Error[0], len);
      64          11 :   return false;
      65             : }
      66             : 
      67             : /// getNumMatches - In a valid regex, return the number of parenthesized
      68             : /// matches it contains.
      69      813489 : unsigned Regex::getNumMatches() const {
      70      813489 :   return preg->re_nsub;
      71             : }
      72             : 
      73    36759205 : bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
      74    36759205 :   if (error)
      75             :     return false;
      76             : 
      77    35441384 :   unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
      78             : 
      79             :   // pmatch needs to have at least one element.
      80             :   SmallVector<llvm_regmatch_t, 8> pm;
      81    35441384 :   pm.resize(nmatch > 0 ? nmatch : 1);
      82    35441384 :   pm[0].rm_so = 0;
      83    35441384 :   pm[0].rm_eo = String.size();
      84             : 
      85    35441384 :   int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
      86             : 
      87    35441384 :   if (rc == REG_NOMATCH)
      88             :     return false;
      89     5095464 :   if (rc != 0) {
      90             :     // regexec can fail due to invalid pattern or running out of memory.
      91           0 :     error = rc;
      92           0 :     return false;
      93             :   }
      94             : 
      95             :   // There was a match.
      96             : 
      97     5095464 :   if (Matches) { // match position requested
      98             :     Matches->clear();
      99             : 
     100     9571940 :     for (unsigned i = 0; i != nmatch; ++i) {
     101    10454768 :       if (pm[i].rm_so == -1) {
     102             :         // this group didn't match
     103       16770 :         Matches->push_back(StringRef());
     104       16770 :         continue;
     105             :       }
     106             :       assert(pm[i].rm_eo >= pm[i].rm_so);
     107     5210614 :       Matches->push_back(StringRef(String.data()+pm[i].rm_so,
     108     5210614 :                                    pm[i].rm_eo-pm[i].rm_so));
     109             :     }
     110             :   }
     111             : 
     112             :   return true;
     113             : }
     114             : 
     115          77 : std::string Regex::sub(StringRef Repl, StringRef String,
     116             :                        std::string *Error) {
     117             :   SmallVector<StringRef, 8> Matches;
     118             : 
     119             :   // Reset error, if given.
     120          77 :   if (Error && !Error->empty()) *Error = "";
     121             : 
     122             :   // Return the input if there was no match.
     123          77 :   if (!match(String, &Matches))
     124             :     return String;
     125             : 
     126             :   // Otherwise splice in the replacement string, starting with the prefix before
     127             :   // the match.
     128          23 :   std::string Res(String.begin(), Matches[0].begin());
     129             : 
     130             :   // Then the replacement string, honoring possible substitutions.
     131          42 :   while (!Repl.empty()) {
     132             :     // Skip to the next escape.
     133          26 :     std::pair<StringRef, StringRef> Split = Repl.split('\\');
     134             : 
     135             :     // Add the skipped substring.
     136             :     Res += Split.first;
     137             : 
     138             :     // Check for terminimation and trailing backslash.
     139          26 :     if (Split.second.empty()) {
     140           1 :       if (Repl.size() != Split.first.size() &&
     141           8 :           Error && Error->empty())
     142             :         *Error = "replacement string contained trailing backslash";
     143           7 :       break;
     144             :     }
     145             : 
     146             :     // Otherwise update the replacement string and interpret escapes.
     147          19 :     Repl = Split.second;
     148             : 
     149             :     // FIXME: We should have a StringExtras function for mapping C99 escapes.
     150          38 :     switch (Repl[0]) {
     151             :       // Treat all unrecognized characters as self-quoting.
     152           2 :     default:
     153             :       Res += Repl[0];
     154           2 :       Repl = Repl.substr(1);
     155           2 :       break;
     156             : 
     157             :       // Single character escapes.
     158             :     case 't':
     159             :       Res += '\t';
     160           1 :       Repl = Repl.substr(1);
     161           1 :       break;
     162             :     case 'n':
     163             :       Res += '\n';
     164           1 :       Repl = Repl.substr(1);
     165           1 :       break;
     166             : 
     167             :       // Decimal escapes are backreferences.
     168             :     case '0': case '1': case '2': case '3': case '4':
     169             :     case '5': case '6': case '7': case '8': case '9': {
     170             :       // Extract the backreference number.
     171          30 :       StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
     172          15 :       Repl = Repl.substr(Ref.size());
     173             : 
     174             :       unsigned RefValue;
     175          15 :       if (!Ref.getAsInteger(10, RefValue) &&
     176          15 :           RefValue < Matches.size())
     177             :         Res += Matches[RefValue];
     178           1 :       else if (Error && Error->empty())
     179           2 :         *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
     180             :       break;
     181             :     }
     182             :     }
     183             :   }
     184             : 
     185             :   // And finally the suffix.
     186          46 :   Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
     187             : 
     188             :   return Res;
     189             : }
     190             : 
     191             : // These are the special characters matched in functions like "p_ere_exp".
     192             : static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
     193             : 
     194       11904 : bool Regex::isLiteralERE(StringRef Str) {
     195             :   // Check for regex metacharacters.  This list was derived from our regex
     196             :   // implementation in regcomp.c and double checked against the POSIX extended
     197             :   // regular expression specification.
     198       11904 :   return Str.find_first_of(RegexMetachars) == StringRef::npos;
     199             : }
     200             : 
     201     1991661 : std::string Regex::escape(StringRef String) {
     202             :   std::string RegexStr;
     203    27505334 :   for (unsigned i = 0, e = String.size(); i != e; ++i) {
     204    76541019 :     if (strchr(RegexMetachars, String[i]))
     205             :       RegexStr += '\\';
     206    25513673 :     RegexStr += String[i];
     207             :   }
     208             : 
     209     1991661 :   return RegexStr;
     210             : }

Generated by: LCOV version 1.13