LLVM  4.0.0
GlobPattern.cpp
Go to the documentation of this file.
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a glob pattern matcher.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/Optional.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/Errc.h"
19 
20 using namespace llvm;
21 
22 static bool hasWildcard(StringRef S) {
23  return S.find_first_of("?*[") != StringRef::npos;
24 }
25 
26 // Expands character ranges and returns a bitmap.
27 // For example, "a-cf-hz" is expanded to "abcfghz".
29  BitVector BV(256, false);
30 
31  // Expand X-Y.
32  for (;;) {
33  if (S.size() < 3)
34  break;
35 
36  // If it doesn't start with something like X-Y,
37  // consume the first character and proceed.
38  if (S[1] != '-') {
39  BV[S[0]] = true;
40  S = S.substr(1);
41  continue;
42  }
43 
44  // It must be in the form of X-Y.
45  // Validate it and then interpret the range.
46  if (S[0] > S[2])
47  return make_error<StringError>("invalid glob pattern: " + Original,
49 
50  for (int C = S[0]; C <= S[2]; ++C)
51  BV[C] = true;
52  S = S.substr(3);
53  }
54 
55  for (char C : S)
56  BV[C] = true;
57  return BV;
58 }
59 
60 // This is a scanner for the glob pattern.
61 // A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
62 // (which is a negative form of "[<chars>]"), or a non-meta character.
63 // This function returns the first token in S.
65  switch (S[0]) {
66  case '*':
67  S = S.substr(1);
68  // '*' is represented by an empty bitvector.
69  // All other bitvectors are 256-bit long.
70  return BitVector();
71  case '?':
72  S = S.substr(1);
73  return BitVector(256, true);
74  case '[': {
75  size_t End = S.find(']', 1);
76  if (End == StringRef::npos)
77  return make_error<StringError>("invalid glob pattern: " + Original,
79 
80  StringRef Chars = S.substr(1, End - 1);
81  S = S.substr(End + 1);
82  if (Chars.startswith("^")) {
83  Expected<BitVector> BV = expand(Chars.substr(1), Original);
84  if (!BV)
85  return BV.takeError();
86  return BV->flip();
87  }
88  return expand(Chars, Original);
89  }
90  default:
91  BitVector BV(256, false);
92  BV[S[0]] = true;
93  S = S.substr(1);
94  return BV;
95  }
96 }
97 
99  GlobPattern Pat;
100 
101  // S doesn't contain any metacharacter,
102  // so the regular string comparison should work.
103  if (!hasWildcard(S)) {
104  Pat.Exact = S;
105  return Pat;
106  }
107 
108  // S is something like "foo*". We can use startswith().
109  if (S.endswith("*") && !hasWildcard(S.drop_back())) {
110  Pat.Prefix = S.drop_back();
111  return Pat;
112  }
113 
114  // S is something like "*foo". We can use endswith().
115  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
116  Pat.Suffix = S.drop_front();
117  return Pat;
118  }
119 
120  // Otherwise, we need to do real glob pattern matching.
121  // Parse the pattern now.
122  StringRef Original = S;
123  while (!S.empty()) {
124  Expected<BitVector> BV = scan(S, Original);
125  if (!BV)
126  return BV.takeError();
127  Pat.Tokens.push_back(*BV);
128  }
129  return Pat;
130 }
131 
133  if (Exact)
134  return S == *Exact;
135  if (Prefix)
136  return S.startswith(*Prefix);
137  if (Suffix)
138  return S.endswith(*Suffix);
139  return matchOne(Tokens, S);
140 }
141 
142 // Runs glob pattern Pats against string S.
143 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
144  for (;;) {
145  if (Pats.empty())
146  return S.empty();
147 
148  // If Pats[0] is '*', try to match Pats[1..] against all possible
149  // tail strings of S to see at least one pattern succeeds.
150  if (Pats[0].size() == 0) {
151  Pats = Pats.slice(1);
152  if (Pats.empty())
153  // Fast path. If a pattern is '*', it matches anything.
154  return true;
155  for (size_t I = 0, E = S.size(); I < E; ++I)
156  if (matchOne(Pats, S.substr(I)))
157  return true;
158  return false;
159  }
160 
161  // If Pats[0] is not '*', it must consume one character.
162  if (S.empty() || !Pats[0][S[0]])
163  return false;
164  Pats = Pats.slice(1);
165  S = S.substr(1);
166  }
167 }
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:634
bool match(StringRef S) const
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:276
Error takeError()
Take ownership of the stored error.
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:171
Tagged union holding either a T or a Error.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:264
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:295
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:587
static bool hasWildcard(StringRef S)
Definition: GlobPattern.cpp:22
static const unsigned End
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:28
static Expected< GlobPattern > create(StringRef Pat)
Definition: GlobPattern.cpp:98
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:130
static const size_t npos
Definition: StringRef.h:51
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:392
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:643
static Expected< BitVector > scan(StringRef &S, StringRef Original)
Definition: GlobPattern.cpp:64