LLVM 22.0.0git
GlobPattern.cpp
Go to the documentation of this file.
1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/StringRef.h"
15#include "llvm/Support/Errc.h"
16
17using namespace llvm;
18
19// Expands character ranges and returns a bitmap.
20// For example, "a-cf-hz" is expanded to "abcfghz".
22 BitVector BV(256, false);
23
24 // Expand X-Y.
25 for (;;) {
26 if (S.size() < 3)
27 break;
28
29 uint8_t Start = S[0];
30 uint8_t End = S[2];
31
32 // If it doesn't start with something like X-Y,
33 // consume the first character and proceed.
34 if (S[1] != '-') {
35 BV[Start] = true;
36 S = S.substr(1);
37 continue;
38 }
39
40 // It must be in the form of X-Y.
41 // Validate it and then interpret the range.
42 if (Start > End)
43 return make_error<StringError>("invalid glob pattern: " + Original,
45
46 for (int C = Start; C <= End; ++C)
47 BV[(uint8_t)C] = true;
48 S = S.substr(3);
49 }
50
51 for (char C : S)
52 BV[(uint8_t)C] = true;
53 return BV;
54}
55
56// Identify brace expansions in S and return the list of patterns they expand
57// into.
59parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
60 SmallVector<std::string> SubPatterns = {S.str()};
61 if (!MaxSubPatterns || !S.contains('{'))
62 return std::move(SubPatterns);
63
64 struct BraceExpansion {
65 size_t Start;
66 size_t Length;
68 };
69 SmallVector<BraceExpansion, 0> BraceExpansions;
70
71 BraceExpansion *CurrentBE = nullptr;
72 size_t TermBegin;
73 for (size_t I = 0, E = S.size(); I != E; ++I) {
74 if (S[I] == '[') {
75 I = S.find(']', I + 2);
76 if (I == std::string::npos)
77 return make_error<StringError>("invalid glob pattern, unmatched '['",
79 } else if (S[I] == '{') {
80 if (CurrentBE)
82 "nested brace expansions are not supported",
84 CurrentBE = &BraceExpansions.emplace_back();
85 CurrentBE->Start = I;
86 TermBegin = I + 1;
87 } else if (S[I] == ',') {
88 if (!CurrentBE)
89 continue;
90 CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
91 TermBegin = I + 1;
92 } else if (S[I] == '}') {
93 if (!CurrentBE)
94 continue;
95 if (CurrentBE->Terms.empty())
97 "empty or singleton brace expansions are not supported",
99 CurrentBE->Terms.push_back(S.substr(TermBegin, I - TermBegin));
100 CurrentBE->Length = I - CurrentBE->Start + 1;
101 CurrentBE = nullptr;
102 } else if (S[I] == '\\') {
103 if (++I == E)
104 return make_error<StringError>("invalid glob pattern, stray '\\'",
106 }
107 }
108 if (CurrentBE)
109 return make_error<StringError>("incomplete brace expansion",
111
112 size_t NumSubPatterns = 1;
113 for (auto &BE : BraceExpansions) {
114 if (NumSubPatterns > std::numeric_limits<size_t>::max() / BE.Terms.size()) {
115 NumSubPatterns = std::numeric_limits<size_t>::max();
116 break;
117 }
118 NumSubPatterns *= BE.Terms.size();
119 }
120 if (NumSubPatterns > *MaxSubPatterns)
121 return make_error<StringError>("too many brace expansions",
123 // Replace brace expansions in reverse order so that we don't invalidate
124 // earlier start indices
125 for (auto &BE : reverse(BraceExpansions)) {
126 SmallVector<std::string> OrigSubPatterns;
127 std::swap(SubPatterns, OrigSubPatterns);
128 for (StringRef Term : BE.Terms)
129 for (StringRef Orig : OrigSubPatterns)
130 SubPatterns.emplace_back(Orig).replace(BE.Start, BE.Length, Term);
131 }
132 return std::move(SubPatterns);
133}
134
136 StringRef Best;
137 while (!S.empty()) {
138 size_t PrefixSize = S.find_first_of("?*[{\\");
139 if (PrefixSize == std::string::npos)
140 PrefixSize = S.size();
141
142 if (Best.size() < PrefixSize)
143 Best = S.take_front(PrefixSize);
144
145 S = S.drop_front(PrefixSize);
146
147 // It's impossible, as the first and last characters of the input string
148 // must be Glob special characters, otherwise they would be parts of
149 // the prefix or the suffix.
150 assert(!S.empty());
151
152 switch (S.front()) {
153 case '\\':
154 S = S.drop_front(2);
155 break;
156 case '[': {
157 // Drop '[' and the first character which can be ']'.
158 S = S.drop_front(2);
159 size_t EndBracket = S.find_first_of("]");
160 // Should not be possible, SubGlobPattern::create should fail on invalid
161 // pattern before we get here.
162 assert(EndBracket != std::string::npos);
163 S = S.drop_front(EndBracket + 1);
164 break;
165 }
166 case '{':
167 // TODO: implement.
168 // Fallback to whatever is best for now.
169 return Best;
170 default:
171 S = S.drop_front(1);
172 }
173 }
174
175 return Best;
176}
177
179GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
180 GlobPattern Pat;
181 Pat.Pattern = S;
182
183 // Store the prefix that does not contain any metacharacter.
184 Pat.PrefixSize = S.find_first_of("?*[{\\");
185 if (Pat.PrefixSize == std::string::npos) {
186 Pat.PrefixSize = S.size();
187 return Pat;
188 }
189 S = S.substr(Pat.PrefixSize);
190
191 // Just in case we stop on unmatched opening brackets.
192 size_t SuffixStart = S.find_last_of("?*[]{}\\");
193 assert(SuffixStart != std::string::npos);
194 if (S[SuffixStart] == '\\')
195 ++SuffixStart;
196 if (SuffixStart < S.size())
197 ++SuffixStart;
198 Pat.SuffixSize = S.size() - SuffixStart;
199 S = S.substr(0, SuffixStart);
200
202 if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
203 return std::move(Err);
204 for (StringRef SubPat : SubPats) {
205 auto SubGlobOrErr = SubGlobPattern::create(SubPat);
206 if (!SubGlobOrErr)
207 return SubGlobOrErr.takeError();
208 Pat.SubGlobs.push_back(*SubGlobOrErr);
209 }
210
211 return Pat;
212}
213
215GlobPattern::SubGlobPattern::create(StringRef S) {
216 SubGlobPattern Pat;
217
218 // Parse brackets.
219 Pat.Pat.assign(S.begin(), S.end());
220 for (size_t I = 0, E = S.size(); I != E; ++I) {
221 if (S[I] == '[') {
222 // ']' is allowed as the first character of a character class. '[]' is
223 // invalid. So, just skip the first character.
224 ++I;
225 size_t J = S.find(']', I + 1);
226 if (J == StringRef::npos)
227 return make_error<StringError>("invalid glob pattern, unmatched '['",
229 StringRef Chars = S.substr(I, J - I);
230 bool Invert = S[I] == '^' || S[I] == '!';
232 Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
233 if (!BV)
234 return BV.takeError();
235 if (Invert)
236 BV->flip();
237 Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
238 I = J;
239 } else if (S[I] == '\\') {
240 if (++I == E)
241 return make_error<StringError>("invalid glob pattern, stray '\\'",
243 }
244 }
245 return Pat;
246}
247
249 return maxPlainSubstring(
250 Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
251}
252
254 if (!S.consume_front(prefix()))
255 return false;
256 if (!S.consume_back(suffix()))
257 return false;
258 if (SubGlobs.empty() && S.empty())
259 return true;
260 for (auto &Glob : SubGlobs)
261 if (Glob.match(S))
262 return true;
263 return false;
264}
265
266// Factor the pattern into segments split by '*'. The segment is matched
267// sequentianlly by finding the first occurrence past the end of the previous
268// match.
269bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
270 const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
271 *SavedS = S;
272 const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
273 size_t B = 0, SavedB = 0;
274 while (S != End) {
275 if (P == PEnd)
276 ;
277 else if (*P == '*') {
278 // The non-* substring on the left of '*' matches the tail of S. Save the
279 // positions to be used by backtracking if we see a mismatch later.
280 SegmentBegin = ++P;
281 SavedS = S;
282 SavedB = B;
283 continue;
284 } else if (*P == '[') {
285 if (Brackets[B].Bytes[uint8_t(*S)]) {
286 P = Pat.data() + Brackets[B++].NextOffset;
287 ++S;
288 continue;
289 }
290 } else if (*P == '\\') {
291 if (*++P == *S) {
292 ++P;
293 ++S;
294 continue;
295 }
296 } else if (*P == *S || *P == '?') {
297 ++P;
298 ++S;
299 continue;
300 }
301 if (!SegmentBegin)
302 return false;
303 // We have seen a '*'. Backtrack to the saved positions. Shift the S
304 // position to probe the next starting position in the segment.
305 P = SegmentBegin;
306 S = ++SavedS;
307 B = SavedB;
308 }
309 // All bytes in Str have been matched. Return true if the rest part of Pat is
310 // empty or contains only '*'.
311 return getPat().find_first_not_of('*', P - Pat.data()) == std::string::npos;
312}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr char SuffixStart
static StringRef maxPlainSubstring(StringRef S)
static Expected< SmallVector< std::string, 1 > > parseBraceExpansions(StringRef S, std::optional< size_t > MaxSubPatterns)
static Expected< BitVector > expand(StringRef S, StringRef Original)
#define I(x, y, z)
Definition MD5.cpp:58
#define P(N)
if(PassOpts->AAPipeline)
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition GlobPattern.h:52
StringRef longest_substr() const
StringRef suffix() const
Definition GlobPattern.h:81
StringRef prefix() const
Definition GlobPattern.h:79
LLVM_ABI bool match(StringRef S) const
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:657
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
iterator begin() const
Definition StringRef.h:112
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
char front() const
front - Get the first character in the string.
Definition StringRef.h:149
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition StringRef.h:401
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition StringRef.h:426
bool consume_front(StringRef Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:637
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition StringRef.h:376
iterator end() const
Definition StringRef.h:114
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition StringRef.h:582
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:293
static constexpr size_t npos
Definition StringRef.h:57
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
@ Length
Definition DWP.cpp:477
@ invalid_argument
Definition Errc.h:56
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869