LLVM 23.0.0git
SpecialCaseList.cpp
Go to the documentation of this file.
1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/RadixTree.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
27#include "llvm/Support/Regex.h"
30#include <assert.h>
31#include <memory>
32#include <mutex>
33#include <stdio.h>
34#include <string>
35#include <system_error>
36#include <utility>
37#include <variant>
38#include <vector>
39
40namespace llvm {
41
42namespace {
43
44// Lagacy v1 matcher.
45class RegexMatcher {
46public:
47 Error insert(StringRef Pattern, unsigned LineNumber);
48 unsigned match(StringRef Query) const;
49 StringRef findRule(unsigned LineNo) const;
50
51private:
52 struct Reg {
53 Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
54 : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
55 StringRef Name;
56 unsigned LineNo;
57 Regex Rg;
58 };
59
60 std::vector<Reg> RegExes;
61};
62
63class GlobMatcher {
64public:
65 Error insert(StringRef Pattern, unsigned LineNumber);
66 unsigned match(StringRef Query) const;
67 StringRef findRule(unsigned LineNo) const;
68
69private:
70 struct Glob {
71 Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern)
72 : Name(Name), LineNo(LineNo), Pattern(std::move(Pattern)) {}
73 StringRef Name;
74 unsigned LineNo;
75 GlobPattern Pattern;
76 };
77
78 void LazyInit() const;
79
80 std::vector<GlobMatcher::Glob> Globs;
81
82 mutable RadixTree<iterator_range<StringRef::const_iterator>,
83 RadixTree<iterator_range<StringRef::const_reverse_iterator>,
85 PrefixSuffixToGlob;
86
87 mutable RadixTree<iterator_range<StringRef::const_iterator>,
89 SubstrToGlob;
90
91 mutable bool Initialized = false;
92};
93
94struct QueryOptions {
95 bool UseGlobs = true;
96 bool RemoveDotSlash = false;
97 bool WarnDotSlashMatch = false;
98};
99
100/// Represents a set of patterns and their line numbers
101class Matcher {
102public:
103 explicit Matcher(QueryOptions QOpts);
104
105 Error insert(StringRef Pattern, unsigned LineNumber);
106 unsigned match(StringRef Query) const;
107
108 bool matchAny(StringRef Query) const { return match(Query); }
109
110private:
111 unsigned matchInternal(StringRef Query) const;
112 StringRef findRule(unsigned LineNo) const;
113
114 std::variant<RegexMatcher, GlobMatcher> M;
115 QueryOptions Options;
116 mutable std::once_flag Warned;
117};
118
119Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
120 if (Pattern.empty())
121 return createStringError(errc::invalid_argument,
122 "Supplied regex was blank");
123
124 // Replace * with .*
125 auto Regexp = Pattern.str();
126 for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
127 pos += strlen(".*")) {
128 Regexp.replace(pos, strlen("*"), ".*");
129 }
130
131 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
132
133 // Check that the regexp is valid.
134 Regex CheckRE(Regexp);
135 std::string REError;
136 if (!CheckRE.isValid(REError))
137 return createStringError(errc::invalid_argument, REError);
138
139 RegExes.emplace_back(Pattern, LineNumber, std::move(CheckRE));
140 return Error::success();
141}
142
143unsigned RegexMatcher::match(StringRef Query) const {
144 for (const auto &R : reverse(RegExes))
145 if (R.Rg.match(Query))
146 return R.LineNo;
147 return 0;
148}
149
150StringRef RegexMatcher::findRule(unsigned LineNo) const {
151 for (const auto &R : RegExes)
152 if (R.LineNo == LineNo)
153 return R.Name;
154 llvm_unreachable("`findRule` should be called only with correct `LineNo`");
155 return {};
156}
157
158Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) {
159 if (Pattern.empty())
160 return createStringError(errc::invalid_argument, "Supplied glob was blank");
161
162 auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024);
163 if (auto Err = Res.takeError())
164 return Err;
165 Globs.emplace_back(Pattern, LineNumber, std::move(Res.get()));
166 return Error::success();
167}
168
169void GlobMatcher::LazyInit() const {
170 if (LLVM_LIKELY(Initialized))
171 return;
172 Initialized = true;
173 for (const auto &[Idx, G] : enumerate(Globs)) {
174 StringRef Prefix = G.Pattern.prefix();
175 StringRef Suffix = G.Pattern.suffix();
176
177 if (Suffix.empty() && Prefix.empty()) {
178 // If both prefix and suffix are empty put into special tree to search by
179 // substring in a middle.
180 StringRef Substr = G.Pattern.longest_substr();
181 if (!Substr.empty()) {
182 // But only if substring is not empty. Searching this tree is more
183 // expensive.
184 auto &V = SubstrToGlob.emplace(Substr).first->second;
185 V.emplace_back(Idx);
186 continue;
187 }
188 }
189
190 auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
191 auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
192 V.emplace_back(Idx);
193 }
194}
195
196unsigned GlobMatcher::match(StringRef Query) const {
197 LazyInit();
198
199 int Best = -1;
200 if (!PrefixSuffixToGlob.empty()) {
201 for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) {
202 for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) {
203 for (int Idx : reverse(V)) {
204 if (Best > Idx)
205 break;
206 const GlobMatcher::Glob &G = Globs[Idx];
207 if (G.Pattern.match(Query)) {
208 Best = Idx;
209 // As soon as we find a match in the vector, we can break for this
210 // vector, since the globs are already sorted by priority within the
211 // prefix group. However, we continue searching other prefix groups
212 // in the map, as they may contain a better match overall.
213 break;
214 }
215 }
216 }
217 }
218 }
219
220 if (!SubstrToGlob.empty()) {
221 // As we don't know when substring exactly starts, we will try all
222 // possibilities. In most cases search will fail on first characters.
223 for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
224 for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
225 for (int Idx : reverse(V)) {
226 if (Best > Idx)
227 break;
228 const GlobMatcher::Glob &G = Globs[Idx];
229 if (G.Pattern.match(Query)) {
230 Best = Idx;
231 // As soon as we find a match in the vector, we can break for this
232 // vector, since the globs are already sorted by priority within the
233 // prefix group. However, we continue searching other prefix groups
234 // in the map, as they may contain a better match overall.
235 break;
236 }
237 }
238 }
239 }
240 }
241 return Best < 0 ? 0 : Globs[Best].LineNo;
242}
243
244StringRef GlobMatcher::findRule(unsigned LineNo) const {
245 for (const auto &G : Globs)
246 if (G.LineNo == LineNo)
247 return G.Name;
248 llvm_unreachable("`findRule` should be called only with correct `LineNo`");
249 return {};
250}
251
252Matcher::Matcher(QueryOptions QOpts) : Options(QOpts) {
253 if (Options.UseGlobs)
254 M.emplace<GlobMatcher>();
255 else
256 M.emplace<RegexMatcher>();
257}
258
259Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
260 return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
261}
262
263/// Matches Query against the patterns. The behavior is controlled by
264/// `#!special-case-list` version.
265//
266// - Version 1 and 2: Path is matched as-is, regardless of presence of "./".
267// - Version 3, 5 and higher: Paths with leading dot-slash are canonicalized
268// to paths without dot-slash before matching. This means that a rule
269// like `src=./foo` will never match, and `src=foo` will match both
270// `foo` and `./foo`. (Version 3 never became default but has this behavior).
271// - Version 4: Transitionary version. Paths are matched both ways
272// (canonicalized and non-canonicalized) to maintain backward compatibility.
273// If a match only works with the old behavior (non-canonicalized), a warning
274// is emitted.
275unsigned Matcher::match(StringRef Query) const {
276 if (!Options.RemoveDotSlash)
277 return matchInternal(Query);
278
279 if (!Options.WarnDotSlashMatch)
280 return matchInternal(llvm::sys::path::remove_leading_dotslash(Query));
281
282 StringRef FixedQuery = llvm::sys::path::remove_leading_dotslash(Query);
283 unsigned FixedMatched = matchInternal(FixedQuery);
284 if (FixedQuery == Query)
285 return FixedMatched;
286
287 unsigned OriginalMatch = matchInternal(Query);
288 if (OriginalMatch > FixedMatched) {
289 std::call_once(Warned, [&]() {
290 WithColor::warning() << "Deprecated behaviour: pattern '"
291 << findRule(OriginalMatch) << "' matches '" << Query
292 << "', update it to match '" << FixedQuery
293 << "' instead (further warnings suppressed).\n";
294 });
295 }
296 return std::max(OriginalMatch, FixedMatched);
297}
298
299unsigned Matcher::matchInternal(StringRef Query) const {
300 return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M);
301}
302
303StringRef Matcher::findRule(unsigned LineNo) const {
304 return std::visit([&](auto &V) -> StringRef { return V.findRule(LineNo); },
305 M);
306}
307} // namespace
308
310public:
311 const Matcher *findMatcher(StringRef Prefix, StringRef Category) const;
312
314
315 explicit SectionImpl(QueryOptions QOpts) : SectionMatcher(QOpts) {}
316
319};
320
321// TODO: Refactor this to return Expected<...>
322std::unique_ptr<SpecialCaseList>
323SpecialCaseList::create(const std::vector<std::string> &Paths,
324 llvm::vfs::FileSystem &FS, std::string &Error) {
325 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
326 if (SCL->createInternal(Paths, FS, Error))
327 return SCL;
328 return nullptr;
329}
330
331std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
332 std::string &Error) {
333 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
334 if (SCL->createInternal(MB, Error))
335 return SCL;
336 return nullptr;
337}
338
339std::unique_ptr<SpecialCaseList>
340SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
342 std::string Error;
343 if (auto SCL = create(Paths, FS, Error))
344 return SCL;
346}
347
348bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
349 vfs::FileSystem &VFS, std::string &Error) {
350 for (size_t i = 0; i < Paths.size(); ++i) {
351 const auto &Path = Paths[i];
353 VFS.getBufferForFile(Path);
354 if (std::error_code EC = FileOrErr.getError()) {
355 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
356 return false;
357 }
358 std::string ParseError;
359 if (!parse(i, FileOrErr.get().get(), ParseError)) {
360 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
361 return false;
362 }
363 }
364 return true;
365}
366
368 std::string &Error) {
369 if (!parse(0, MB, Error))
370 return false;
371 return true;
372}
373
375SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
376 unsigned LineNo, bool UseGlobs) {
377 SectionStr = SectionStr.copy(StrAlloc);
378 Sections.emplace_back(SectionStr, FileNo, UseGlobs);
379 auto &Section = Sections.back();
380
381 if (auto Err = Section.Impl->SectionMatcher.insert(SectionStr, LineNo)) {
383 "malformed section at line " + Twine(LineNo) +
384 ": '" + SectionStr +
385 "': " + toString(std::move(Err)));
386 }
387
388 return &Section;
389}
390
391bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
392 std::string &Error) {
393 unsigned long long Version = 2;
394
395 StringRef Header = MB->getBuffer();
396 if (Header.consume_front("#!special-case-list-v"))
397 consumeUnsignedInteger(Header, 10, Version);
398
399 auto MinVersion = [&](unsigned V) { return Version >= V; };
400
401 // In https://reviews.llvm.org/D154014 we added glob support and planned
402 // to remove regex support in patterns. We temporarily support the
403 // original behavior using regexes if "#!special-case-list-v1" is the
404 // first line of the file. For more details, see
405 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
406 bool UseGlobs = MinVersion(2);
407 bool RemoveDotSlash = MinVersion(3);
408 bool WarnDotSlash = MinVersion(4) && !MinVersion(5);
409
410 auto ErrOrSection = addSection("*", FileIdx, 1, true);
411 if (auto Err = ErrOrSection.takeError()) {
412 Error = toString(std::move(Err));
413 return false;
414 }
415 Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get();
416
417 // This is the current list of prefixes for all existing users matching file
418 // path. We may need parametrization in constructor in future.
419 constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
420
421 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
422 !LineIt.is_at_eof(); LineIt++) {
423 unsigned LineNo = LineIt.line_number();
424 StringRef Line = LineIt->trim();
425 if (Line.empty())
426 continue;
427
428 // Save section names
429 if (Line.starts_with("[")) {
430 if (!Line.ends_with("]")) {
431 Error =
432 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
433 .str();
434 return false;
435 }
436
437 auto ErrOrSection =
438 addSection(Line.drop_front().drop_back(), FileIdx, LineNo, UseGlobs);
439 if (auto Err = ErrOrSection.takeError()) {
440 Error = toString(std::move(Err));
441 return false;
442 }
443 CurrentImpl = ErrOrSection.get()->Impl.get();
444 continue;
445 }
446
447 // Get our prefix and unparsed glob.
448 auto [Prefix, Postfix] = Line.split(":");
449 if (Postfix.empty()) {
450 // Missing ':' in the line.
451 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
452 return false;
453 }
454
455 QueryOptions QOpts;
456 QOpts.UseGlobs = UseGlobs;
457 if (llvm::is_contained(PathPrefixes, Prefix)) {
458 QOpts.RemoveDotSlash = RemoveDotSlash;
459 QOpts.WarnDotSlashMatch = WarnDotSlash;
460 }
461
462 auto [Pattern, Category] = Postfix.split("=");
463 auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(Category, QOpts);
464 Pattern = Pattern.copy(StrAlloc);
465 if (auto Err = It->second.insert(Pattern, LineNo)) {
466 Error =
467 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
468 Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
469 .str();
470 return false;
471 }
472 }
473
474 return true;
475}
476
477SpecialCaseList::~SpecialCaseList() = default;
478
480 StringRef Query, StringRef Category) const {
481 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
482 return LineNo;
483}
484
485std::pair<unsigned, unsigned>
487 StringRef Query, StringRef Category) const {
488 for (const auto &S : reverse(Sections)) {
489 if (S.Impl->SectionMatcher.matchAny(Section)) {
490 unsigned Blame = S.getLastMatch(Prefix, Query, Category);
491 if (Blame)
492 return {S.FileIdx, Blame};
493 }
494 }
495 return NotFound;
496}
497
499 bool UseGlobs)
500 : Name(Str), FileIdx(FileIdx),
501 Impl(std::make_unique<SectionImpl>(
502 QueryOptions{UseGlobs, /*RemoveDotSlash=*/false})) {}
503
505
507
509 return Impl->SectionMatcher.matchAny(Name);
510}
511
512const Matcher *
514 StringRef Category) const {
516 if (I == Entries.end())
517 return nullptr;
518 StringMap<Matcher>::const_iterator II = I->second.find(Category);
519 if (II == I->second.end())
520 return nullptr;
521
522 return &II->second;
523}
524
526 StringRef Query,
527 StringRef Category) const {
528 if (const Matcher *M = Impl->findMatcher(Prefix, Category))
529 return M->match(Query);
530 return 0;
531}
532
534 return Impl->Entries.contains(Prefix);
535}
536
537} // namespace llvm
This file defines the StringMap class.
#define LLVM_LIKELY(EXPR)
Definition Compiler.h:335
#define _
static LVOptions Options
Definition LVOptions.cpp:25
static llvm::Error parse(GsymDataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Definition LineTable.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static const char * toString(MIToken::TokenKind TokenKind)
Definition MIParser.cpp:630
static Error addSection(const NewSectionInfo &NewSection, Object &Obj)
Register Reg
uint64_t IntrinsicInst * II
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Defines the virtual file system interface vfs::FileSystem.
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
This interface provides simple read-only access to a block of memory, and provides simple methods for...
StringMap< StringMap< Matcher > > SectionEntries
const Matcher * findMatcher(StringRef Prefix, StringRef Category) const
LLVM_ABI Section(StringRef Name, unsigned FileIdx, bool UseGlobs)
LLVM_ABI bool hasPrefix(StringRef Prefix) const
Returns true if the section has any entries for the given prefix.
LLVM_ABI unsigned getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const
LLVM_ABI bool matchName(StringRef Name) const
static constexpr std::pair< unsigned, unsigned > NotFound
LLVM_ABI std::pair< unsigned, unsigned > inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns the file index and the line number <FileIdx, LineNo> corresponding to the special case list e...
LLVM_ABI bool createInternal(const std::vector< std::string > &Paths, vfs::FileSystem &VFS, std::string &Error)
static LLVM_ABI std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
static LLVM_ABI std::unique_ptr< SpecialCaseList > create(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS, std::string &Error)
Parses the special case list entries from files.
LLVM_ABI bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns true, if special case list contains a line.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:128
StringMapIterBase< StringMap< Matcher >, true > const_iterator
Definition StringMap.h:207
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
StringRef copy(Allocator &A) const
Definition StringRef.h:160
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
LLVM_ABI StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Remove redundant leading "./" pieces and consecutive separators.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, unsigned long long &Result)
@ invalid_argument
Definition Errc.h:56
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860