LLVM 17.0.0git
GlobPattern.cpp
Go to the documentation of this file.
1//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/Support/Errc.h"
17
18using namespace llvm;
19
20static bool hasWildcard(StringRef S) {
21 return S.find_first_of("?*[\\") != StringRef::npos;
22}
23
24// Expands character ranges and returns a bitmap.
25// For example, "a-cf-hz" is expanded to "abcfghz".
27 BitVector BV(256, false);
28
29 // Expand X-Y.
30 for (;;) {
31 if (S.size() < 3)
32 break;
33
34 uint8_t Start = S[0];
35 uint8_t End = S[2];
36
37 // If it doesn't start with something like X-Y,
38 // consume the first character and proceed.
39 if (S[1] != '-') {
40 BV[Start] = true;
41 S = S.substr(1);
42 continue;
43 }
44
45 // It must be in the form of X-Y.
46 // Validate it and then interpret the range.
47 if (Start > End)
48 return make_error<StringError>("invalid glob pattern: " + Original,
49 errc::invalid_argument);
50
51 for (int C = Start; C <= End; ++C)
52 BV[(uint8_t)C] = true;
53 S = S.substr(3);
54 }
55
56 for (char C : S)
57 BV[(uint8_t)C] = true;
58 return BV;
59}
60
61// This is a scanner for the glob pattern.
62// A glob pattern token is one of "*", "?", "\", "[<chars>]", "[^<chars>]"
63// (which is a negative form of "[<chars>]"), "[!<chars>]" (which is
64// equivalent to "[^<chars>]"), or a non-meta character.
65// This function returns the first token in S.
67 switch (S[0]) {
68 case '*':
69 S = S.substr(1);
70 // '*' is represented by an empty bitvector.
71 // All other bitvectors are 256-bit long.
72 return BitVector();
73 case '?':
74 S = S.substr(1);
75 return BitVector(256, true);
76 case '[': {
77 // ']' is allowed as the first character of a character class. '[]' is
78 // invalid. So, just skip the first character.
79 size_t End = S.find(']', 2);
80 if (End == StringRef::npos)
81 return make_error<StringError>("invalid glob pattern: " + Original,
82 errc::invalid_argument);
83
84 StringRef Chars = S.substr(1, End - 1);
85 S = S.substr(End + 1);
86 if (Chars.startswith("^") || Chars.startswith("!")) {
87 Expected<BitVector> BV = expand(Chars.substr(1), Original);
88 if (!BV)
89 return BV.takeError();
90 return BV->flip();
91 }
92 return expand(Chars, Original);
93 }
94 case '\\':
95 // Eat this character and fall through below to treat it like a non-meta
96 // character.
97 S = S.substr(1);
98 [[fallthrough]];
99 default:
100 BitVector BV(256, false);
101 BV[(uint8_t)S[0]] = true;
102 S = S.substr(1);
103 return BV;
104 }
105}
106
108 GlobPattern Pat;
109
110 // S doesn't contain any metacharacter,
111 // so the regular string comparison should work.
112 if (!hasWildcard(S)) {
113 Pat.Exact = S;
114 return Pat;
115 }
116
117 // S is something like "foo*", and the "* is not escaped. We can use
118 // startswith().
119 if (S.endswith("*") && !S.endswith("\\*") && !hasWildcard(S.drop_back())) {
120 Pat.Prefix = S.drop_back();
121 return Pat;
122 }
123
124 // S is something like "*foo". We can use endswith().
125 if (S.startswith("*") && !hasWildcard(S.drop_front())) {
126 Pat.Suffix = S.drop_front();
127 return Pat;
128 }
129
130 // Otherwise, we need to do real glob pattern matching.
131 // Parse the pattern now.
132 StringRef Original = S;
133 while (!S.empty()) {
134 Expected<BitVector> BV = scan(S, Original);
135 if (!BV)
136 return BV.takeError();
137 Pat.Tokens.push_back(*BV);
138 }
139 return Pat;
140}
141
143 if (Exact)
144 return S == *Exact;
145 if (Prefix)
146 return S.startswith(*Prefix);
147 if (Suffix)
148 return S.endswith(*Suffix);
149 return matchOne(Tokens, S);
150}
151
152// Runs glob pattern Pats against string S.
153bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
154 for (;;) {
155 if (Pats.empty())
156 return S.empty();
157
158 // If Pats[0] is '*', try to match Pats[1..] against all possible
159 // tail strings of S to see at least one pattern succeeds.
160 if (Pats[0].size() == 0) {
161 Pats = Pats.slice(1);
162 if (Pats.empty())
163 // Fast path. If a pattern is '*', it matches anything.
164 return true;
165 for (size_t I = 0, E = S.size(); I < E; ++I)
166 if (matchOne(Pats, S.substr(I)))
167 return true;
168 return false;
169 }
170
171 // If Pats[0] is not '*', it must consume one character.
172 if (S.empty() || !Pats[0][(uint8_t)S[0]])
173 return false;
174 Pats = Pats.slice(1);
175 S = S.substr(1);
176 }
177}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:26
static bool hasWildcard(StringRef S)
Definition: GlobPattern.cpp:20
static Expected< BitVector > scan(StringRef &S, StringRef Original)
Definition: GlobPattern.cpp:66
#define I(x, y, z)
Definition: MD5.cpp:58
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
Tagged union holding either a T or a Error.
Definition: Error.h:470
Error takeError()
Take ownership of the stored error.
Definition: Error.h:597
bool match(StringRef S) const
static Expected< GlobPattern > create(StringRef Pat)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:558
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:596
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
bool startswith(StringRef Prefix) const
Definition: StringRef.h:261
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:375
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:295
bool endswith(StringRef Suffix) const
Definition: StringRef.h:277
static constexpr size_t npos
Definition: StringRef.h:52
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:603
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777