LLVM 23.0.0git
GlobPattern.h
Go to the documentation of this file.
1//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a glob pattern matcher.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_GLOBPATTERN_H
14#define LLVM_SUPPORT_GLOBPATTERN_H
15
16#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/Error.h"
21#include <optional>
22
23namespace llvm {
24
25/// This class implements a glob pattern matcher similar to the one found in
26/// bash, but with some key differences. Namely, that `*` matches all
27/// characters and does not exclude path separators.
28///
29/// * `?` matches a single character.
30/// * `*` matches zero or more characters.
31/// * `[<chars>]` matches one character in the bracket. Character ranges,
32/// e.g., `[a-z]`, and negative sets via `[^ab]` or `[!ab]` are also
33/// supported.
34/// * `{<glob>,...}` matches one of the globs in the list. Nested brace
35/// expansions are not supported. If \p MaxSubPatterns is empty then
36/// brace expansions are not supported and characters `{,}` are treated as
37/// literals.
38/// * `\` escapes the next character so it is treated as a literal.
39///
40/// Some known edge cases are:
41/// * The literal `]` is allowed as the first character in a character class,
42/// i.e., `[]]` is valid and matches the literal `]`.
43/// * The empty character class, i.e., `[]`, is invalid.
44/// * Empty or singleton brace expansions, e.g., `{}`, `{a}`, are invalid.
45/// * The literals `}` and `,` that are not inside a brace expansion are taken
46/// as literals, e.g., `,}` is valid but `{` is not.
47///
48/// Examples:
49/// * `*[/\\]foo.{c,cpp}` will match (unix or windows) paths to files named
50/// `foo.c` or `foo.cpp`.
51/// * `_Z{N,NK,}S[tabsoid]*` will match mangled C++ standard library functions.
53public:
54 /// \param Pat the pattern to match against
55 /// \param MaxSubPatterns if provided limit the number of allowed subpatterns
56 /// created from expanding braces otherwise disable
57 /// brace expansion
59 create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
60 bool SlashAgnostic = false);
61 /// \returns \p true if \p S matches this glob pattern
62 LLVM_ABI bool match(StringRef S) const;
63
64 // Returns true for glob pattern "*". Can be used to avoid expensive
65 // preparation/acquisition of the input for match().
66 bool isTrivialMatchAll() const {
67 if (PrefixSize)
68 return false;
69 if (SuffixSize)
70 return false;
71 if (SubGlobs.size() != 1)
72 return false;
73 return SubGlobs[0].getPat() == "*";
74 }
75
76 // The following functions are just shortcuts for faster matching. They are
77 // conservative to simplify implementations.
78
79 // Returns plain prefix of the pattern.
80 StringRef prefix() const { return Pattern.take_front(PrefixSize); }
81 // Returns plain suffix of the pattern.
82 StringRef suffix() const { return Pattern.take_back(SuffixSize); }
83 // Returns the longest plain substring of the pattern between prefix and
84 // suffix.
86
87private:
89 size_t PrefixSize = 0;
90 size_t SuffixSize = 0;
91 bool SlashAgnostic = false;
92
93 struct SubGlobPattern {
94 /// \param Pat the pattern to match against
96 bool SlashAgnostic);
97 /// \returns \p true if \p S matches this glob pattern
98 LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
99 StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
100
101 // Brackets with their end position and matched bytes.
102 struct Bracket {
105 };
108 };
110};
111}
112
113#endif // LLVM_SUPPORT_GLOBPATTERN_H
This file implements the BitVector class.
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
This file defines the SmallVector class.
Tagged union holding either a T or a Error.
Definition Error.h:485
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition GlobPattern.h:52
LLVM_ABI_FOR_TEST StringRef longest_substr() const
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={}, bool SlashAgnostic=false)
StringRef suffix() const
Definition GlobPattern.h:82
StringRef prefix() const
Definition GlobPattern.h:80
LLVM_ABI bool match(StringRef S) const
bool isTrivialMatchAll() const
Definition GlobPattern.h:66
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
StringRef take_back(size_t N=1) const
Return a StringRef equal to 'this' but with only the last N elements remaining.
Definition StringRef.h:609
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition StringRef.h:600
bool match(Val *V, const Pattern &P)
This is an optimization pass for GlobalISel generic memory operations.