LCOV - code coverage report
Current view: top level - lib/Support - Regex.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 94 96 97.9 %
Date: 2017-09-14 15:23:50 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- Regex.cpp - Regular Expression matcher implementation -------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements a POSIX regular expression matcher.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "llvm/Support/Regex.h"
      15             : #include "regex_impl.h"
      16             : #include "llvm/ADT/SmallVector.h"
      17             : #include "llvm/ADT/StringRef.h"
      18             : #include "llvm/ADT/Twine.h"
      19             : #include <string>
      20             : using namespace llvm;
      21             : 
      22           2 : Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
      23             : 
      24     1263337 : Regex::Regex(StringRef regex, unsigned Flags) {
      25     1263337 :   unsigned flags = 0;
      26     1263337 :   preg = new llvm_regex();
      27     2526674 :   preg->re_endp = regex.end();
      28     1263337 :   if (Flags & IgnoreCase) 
      29        5752 :     flags |= REG_ICASE;
      30     1263337 :   if (Flags & Newline)
      31      474496 :     flags |= REG_NEWLINE;
      32     1263337 :   if (!(Flags & BasicRegex))
      33     1263337 :     flags |= REG_EXTENDED;
      34     1263337 :   error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
      35     1263337 : }
      36             : 
      37       25394 : Regex::Regex(Regex &&regex) {
      38       25394 :   preg = regex.preg;
      39       25394 :   error = regex.error;
      40       25394 :   regex.preg = nullptr;
      41       25394 :   regex.error = REG_BADPAT;
      42       25394 : }
      43             : 
      44     2577424 : Regex::~Regex() {
      45     1288712 :   if (preg) {
      46     1263316 :     llvm_regfree(preg);
      47     1263316 :     delete preg;
      48             :   }
      49     1288712 : }
      50             : 
      51      543480 : bool Regex::isValid(std::string &Error) const {
      52      543480 :   if (!error)
      53             :     return true;
      54             :   
      55           8 :   size_t len = llvm_regerror(error, preg, nullptr, 0);
      56             :   
      57          16 :   Error.resize(len - 1);
      58           8 :   llvm_regerror(error, preg, &Error[0], len);
      59           8 :   return false;
      60             : }
      61             : 
      62             : /// getNumMatches - In a valid regex, return the number of parenthesized
      63             : /// matches it contains.
      64      497271 : unsigned Regex::getNumMatches() const {
      65      497271 :   return preg->re_nsub;
      66             : }
      67             : 
      68   181767624 : bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
      69   181767624 :   if (error)
      70             :     return false;
      71             : 
      72   180655716 :   unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
      73             : 
      74             :   // pmatch needs to have at least one element.
      75   180655716 :   SmallVector<llvm_regmatch_t, 8> pm;
      76   180655716 :   pm.resize(nmatch > 0 ? nmatch : 1);
      77   180655716 :   pm[0].rm_so = 0;
      78   361311432 :   pm[0].rm_eo = String.size();
      79             : 
      80   361311432 :   int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
      81             : 
      82   180655716 :   if (rc == REG_NOMATCH)
      83             :     return false;
      84     3353881 :   if (rc != 0) {
      85             :     // regexec can fail due to invalid pattern or running out of memory.
      86           0 :     error = rc;
      87           0 :     return false;
      88             :   }
      89             : 
      90             :   // There was a match.
      91             : 
      92     3353881 :   if (Matches) { // match position requested
      93     2843031 :     Matches->clear();
      94             :     
      95     6225957 :     for (unsigned i = 0; i != nmatch; ++i) {
      96     6780844 :       if (pm[i].rm_so == -1) {
      97             :         // this group didn't match
      98       14992 :         Matches->push_back(StringRef());
      99       14992 :         continue;
     100             :       }
     101             :       assert(pm[i].rm_eo >= pm[i].rm_so);
     102    13471736 :       Matches->push_back(StringRef(String.data()+pm[i].rm_so,
     103    10103802 :                                    pm[i].rm_eo-pm[i].rm_so));
     104             :     }
     105             :   }
     106             : 
     107             :   return true;
     108             : }
     109             : 
     110          73 : std::string Regex::sub(StringRef Repl, StringRef String,
     111             :                        std::string *Error) {
     112         146 :   SmallVector<StringRef, 8> Matches;
     113             : 
     114             :   // Reset error, if given.
     115         145 :   if (Error && !Error->empty()) *Error = "";
     116             : 
     117             :   // Return the input if there was no match.
     118          73 :   if (!match(String, &Matches))
     119             :     return String;
     120             : 
     121             :   // Otherwise splice in the replacement string, starting with the prefix before
     122             :   // the match.
     123          84 :   std::string Res(String.begin(), Matches[0].begin());
     124             : 
     125             :   // Then the replacement string, honoring possible substitutions.
     126          59 :   while (!Repl.empty()) {
     127             :     // Skip to the next escape.
     128          24 :     std::pair<StringRef, StringRef> Split = Repl.split('\\');
     129             : 
     130             :     // Add the skipped substring.
     131          24 :     Res += Split.first;
     132             : 
     133             :     // Check for terminimation and trailing backslash.
     134          24 :     if (Split.second.empty()) {
     135           6 :       if (Repl.size() != Split.first.size() &&
     136           6 :           Error && Error->empty())
     137             :         *Error = "replacement string contained trailing backslash";
     138           5 :       break;
     139             :     }
     140             : 
     141             :     // Otherwise update the replacement string and interpret escapes.
     142          19 :     Repl = Split.second;
     143             : 
     144             :     // FIXME: We should have a StringExtras function for mapping C99 escapes.
     145          38 :     switch (Repl[0]) {
     146             :       // Treat all unrecognized characters as self-quoting.
     147           2 :     default:
     148           4 :       Res += Repl[0];
     149           2 :       Repl = Repl.substr(1);
     150           2 :       break;
     151             : 
     152             :       // Single character escapes.
     153           1 :     case 't':
     154           1 :       Res += '\t';
     155           1 :       Repl = Repl.substr(1);
     156           1 :       break;
     157           1 :     case 'n':
     158           1 :       Res += '\n';
     159           1 :       Repl = Repl.substr(1);
     160           1 :       break;
     161             : 
     162             :       // Decimal escapes are backreferences.
     163          15 :     case '0': case '1': case '2': case '3': case '4':
     164             :     case '5': case '6': case '7': case '8': case '9': {
     165             :       // Extract the backreference number.
     166          30 :       StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
     167          30 :       Repl = Repl.substr(Ref.size());
     168             : 
     169             :       unsigned RefValue;
     170          30 :       if (!Ref.getAsInteger(10, RefValue) &&
     171          30 :           RefValue < Matches.size())
     172          28 :         Res += Matches[RefValue];
     173           2 :       else if (Error && Error->empty())
     174           6 :         *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
     175             :       break;
     176             :     }
     177             :     }
     178             :   }
     179             : 
     180             :   // And finally the suffix.
     181         105 :   Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
     182             : 
     183          21 :   return Res;
     184             : }
     185             : 
     186             : // These are the special characters matched in functions like "p_ere_exp".
     187             : static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
     188             : 
     189       11552 : bool Regex::isLiteralERE(StringRef Str) {
     190             :   // Check for regex metacharacters.  This list was derived from our regex
     191             :   // implementation in regcomp.c and double checked against the POSIX extended
     192             :   // regular expression specification.
     193       11552 :   return Str.find_first_of(RegexMetachars) == StringRef::npos;
     194             : }
     195             : 
     196     1251202 : std::string Regex::escape(StringRef String) {
     197     1251202 :   std::string RegexStr;
     198    19238898 :   for (unsigned i = 0, e = String.size(); i != e; ++i) {
     199    50209482 :     if (strchr(RegexMetachars, String[i]))
     200             :       RegexStr += '\\';
     201    50209482 :     RegexStr += String[i];
     202             :   }
     203             : 
     204     1251202 :   return RegexStr;
     205             : }

Generated by: LCOV version 1.13