LLVM  14.0.0git
GlobPattern.cpp
Go to the documentation of this file.
1 //===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a glob pattern matcher.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 
19 using namespace llvm;
20 
21 static bool hasWildcard(StringRef S) {
22  return S.find_first_of("?*[\\") != StringRef::npos;
23 }
24 
25 // Expands character ranges and returns a bitmap.
26 // For example, "a-cf-hz" is expanded to "abcfghz".
28  BitVector BV(256, false);
29 
30  // Expand X-Y.
31  for (;;) {
32  if (S.size() < 3)
33  break;
34 
35  uint8_t Start = S[0];
36  uint8_t End = S[2];
37 
38  // If it doesn't start with something like X-Y,
39  // consume the first character and proceed.
40  if (S[1] != '-') {
41  BV[Start] = true;
42  S = S.substr(1);
43  continue;
44  }
45 
46  // It must be in the form of X-Y.
47  // Validate it and then interpret the range.
48  if (Start > End)
49  return make_error<StringError>("invalid glob pattern: " + Original,
51 
52  for (int C = Start; C <= End; ++C)
53  BV[(uint8_t)C] = true;
54  S = S.substr(3);
55  }
56 
57  for (char C : S)
58  BV[(uint8_t)C] = true;
59  return BV;
60 }
61 
62 // This is a scanner for the glob pattern.
63 // A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
64 // (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
65 // equivalent to "[^<chars>]"), or a non-meta character.
66 // This function returns the first token in S.
68  switch (S[0]) {
69  case '*':
70  S = S.substr(1);
71  // '*' is represented by an empty bitvector.
72  // All other bitvectors are 256-bit long.
73  return BitVector();
74  case '?':
75  S = S.substr(1);
76  return BitVector(256, true);
77  case '[': {
78  // ']' is allowed as the first character of a character class. '[]' is
79  // invalid. So, just skip the first character.
80  size_t End = S.find(']', 2);
81  if (End == StringRef::npos)
82  return make_error<StringError>("invalid glob pattern: " + Original,
84 
85  StringRef Chars = S.substr(1, End - 1);
86  S = S.substr(End + 1);
87  if (Chars.startswith("^") || Chars.startswith("!")) {
88  Expected<BitVector> BV = expand(Chars.substr(1), Original);
89  if (!BV)
90  return BV.takeError();
91  return BV->flip();
92  }
93  return expand(Chars, Original);
94  }
95  case '\\':
96  // Eat this character and fall through below to treat it like a non-meta
97  // character.
98  S = S.substr(1);
100  default:
101  BitVector BV(256, false);
102  BV[(uint8_t)S[0]] = true;
103  S = S.substr(1);
104  return BV;
105  }
106 }
107 
109  GlobPattern Pat;
110 
111  // S doesn't contain any metacharacter,
112  // so the regular string comparison should work.
113  if (!hasWildcard(S)) {
114  Pat.Exact = S;
115  return Pat;
116  }
117 
118  // S is something like "foo*", and the "* is not escaped. We can use
119  // startswith().
120  if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
121  Pat.Prefix = S.drop_back();
122  return Pat;
123  }
124 
125  // S is something like "*foo". We can use endswith().
126  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
127  Pat.Suffix = S.drop_front();
128  return Pat;
129  }
130 
131  // Otherwise, we need to do real glob pattern matching.
132  // Parse the pattern now.
133  StringRef Original = S;
134  while (!S.empty()) {
135  Expected<BitVector> BV = scan(S, Original);
136  if (!BV)
137  return BV.takeError();
138  Pat.Tokens.push_back(*BV);
139  }
140  return Pat;
141 }
142 
144  if (Exact)
145  return S == *Exact;
146  if (Prefix)
147  return S.startswith(*Prefix);
148  if (Suffix)
149  return S.endswith(*Suffix);
150  return matchOne(Tokens, S);
151 }
152 
153 // Runs glob pattern Pats against string S.
154 bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
155  for (;;) {
156  if (Pats.empty())
157  return S.empty();
158 
159  // If Pats[0] is '*', try to match Pats[1..] against all possible
160  // tail strings of S to see at least one pattern succeeds.
161  if (Pats[0].size() == 0) {
162  Pats = Pats.slice(1);
163  if (Pats.empty())
164  // Fast path. If a pattern is '*', it matches anything.
165  return true;
166  for (size_t I = 0, E = S.size(); I < E; ++I)
167  if (matchOne(Pats, S.substr(I)))
168  return true;
169  return false;
170  }
171 
172  // If Pats[0] is not '*', it must consume one character.
173  if (S.empty() || !Pats[0][(uint8_t)S[0]])
174  return false;
175  Pats = Pats.slice(1);
176  S = S.substr(1);
177  }
178 }
llvm::errc::invalid_argument
@ invalid_argument
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:285
scan
static Expected< BitVector > scan(StringRef &S, StringRef Original)
Definition: GlobPattern.cpp:67
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
Optional.h
StringRef.h
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:59
Errc.h
llvm::GlobPattern
Definition: GlobPattern.h:29
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:610
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::BitVector
Definition: BitVector.h:74
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
hasWildcard
static bool hasWildcard(StringRef S)
Definition: GlobPattern.cpp:21
expand
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:27
I
#define I(x, y, z)
Definition: MD5.cpp:59
ArrayRef.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1581
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::GlobPattern::create
static Expected< GlobPattern > create(StringRef Pat)
Definition: GlobPattern.cpp:108
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:286
GlobPattern.h
llvm::Expected::takeError
Error takeError()
Take ownership of the stored error.
Definition: Error.h:599
llvm::GlobPattern::match
bool match(StringRef S) const
Definition: GlobPattern.cpp:143