LLVM 22.0.0git
SpecialCaseList.cpp
Go to the documentation of this file.
1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/RadixTree.h"
18#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
26#include "llvm/Support/Regex.h"
29#include <memory>
30#include <stdio.h>
31#include <string>
32#include <system_error>
33#include <utility>
34#include <variant>
35#include <vector>
36
37namespace llvm {
38
39namespace {
40
41using Match = std::pair<StringRef, unsigned>;
42static constexpr Match NotMatched = {"", 0};
43
44// Lagacy v1 matcher.
45class RegexMatcher {
46public:
47 Error insert(StringRef Pattern, unsigned LineNumber);
48 void preprocess(bool BySize);
49
50 Match match(StringRef Query) const;
51
52 struct Reg {
53 Reg(StringRef Name, unsigned LineNo, Regex &&Rg)
54 : Name(Name), LineNo(LineNo), Rg(std::move(Rg)) {}
55 StringRef Name;
56 unsigned LineNo;
57 Regex Rg;
58 };
59
60 std::vector<Reg> RegExes;
61};
62
63class GlobMatcher {
64public:
65 Error insert(StringRef Pattern, unsigned LineNumber);
66 void preprocess(bool BySize);
67
68 Match match(StringRef Query) const;
69
70 struct Glob {
71 Glob(StringRef Name, unsigned LineNo, GlobPattern &&Pattern)
72 : Name(Name), LineNo(LineNo), Pattern(std::move(Pattern)) {}
73 StringRef Name;
74 unsigned LineNo;
75 GlobPattern Pattern;
76 };
77
78 std::vector<GlobMatcher::Glob> Globs;
79
80 RadixTree<iterator_range<StringRef::const_iterator>,
81 RadixTree<iterator_range<StringRef::const_reverse_iterator>,
83 PrefixSuffixToGlob;
84
85 RadixTree<iterator_range<StringRef::const_iterator>, SmallVector<int, 1>>
86 SubstrToGlob;
87};
88
89/// Represents a set of patterns and their line numbers
90class Matcher {
91public:
92 Matcher(bool UseGlobs, bool RemoveDotSlash);
93
94 Error insert(StringRef Pattern, unsigned LineNumber);
95 void preprocess(bool BySize);
96 Match match(StringRef Query) const;
97
98 bool matchAny(StringRef Query) const { return match(Query).second > 0; }
99
100 std::variant<RegexMatcher, GlobMatcher> M;
101 bool RemoveDotSlash;
102};
103
104Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
105 if (Pattern.empty())
106 return createStringError(errc::invalid_argument,
107 "Supplied regex was blank");
108
109 // Replace * with .*
110 auto Regexp = Pattern.str();
111 for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
112 pos += strlen(".*")) {
113 Regexp.replace(pos, strlen("*"), ".*");
114 }
115
116 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
117
118 // Check that the regexp is valid.
119 Regex CheckRE(Regexp);
120 std::string REError;
121 if (!CheckRE.isValid(REError))
122 return createStringError(errc::invalid_argument, REError);
123
124 RegExes.emplace_back(Pattern, LineNumber, std::move(CheckRE));
125 return Error::success();
126}
127
128void RegexMatcher::preprocess(bool BySize) {
129 if (BySize) {
130 llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) {
131 return A.Name.size() < B.Name.size();
132 });
133 }
134}
135
136Match RegexMatcher::match(StringRef Query) const {
137 for (const auto &R : reverse(RegExes))
138 if (R.Rg.match(Query))
139 return {R.Name, R.LineNo};
140 return NotMatched;
141}
142
143Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) {
144 if (Pattern.empty())
145 return createStringError(errc::invalid_argument, "Supplied glob was blank");
146
147 auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024);
148 if (auto Err = Res.takeError())
149 return Err;
150 Globs.emplace_back(Pattern, LineNumber, std::move(Res.get()));
151 return Error::success();
152}
153
154void GlobMatcher::preprocess(bool BySize) {
155 if (BySize) {
156 llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) {
157 return A.Name.size() < B.Name.size();
158 });
159 }
160
161 for (const auto &[Idx, G] : enumerate(Globs)) {
162 StringRef Prefix = G.Pattern.prefix();
163 StringRef Suffix = G.Pattern.suffix();
164
165 if (Suffix.empty() && Prefix.empty()) {
166 // If both prefix and suffix are empty put into special tree to search by
167 // substring in a middle.
168 StringRef Substr = G.Pattern.longest_substr();
169 if (!Substr.empty()) {
170 // But only if substring is not empty. Searching this tree is more
171 // expensive.
172 auto &V = SubstrToGlob.emplace(Substr).first->second;
173 V.emplace_back(Idx);
174 continue;
175 }
176 }
177
178 auto &SToGlob = PrefixSuffixToGlob.emplace(Prefix).first->second;
179 auto &V = SToGlob.emplace(reverse(Suffix)).first->second;
180 V.emplace_back(Idx);
181 }
182}
183
184Match GlobMatcher::match(StringRef Query) const {
185 int Best = -1;
186 if (!PrefixSuffixToGlob.empty()) {
187 for (const auto &[_, SToGlob] : PrefixSuffixToGlob.find_prefixes(Query)) {
188 for (const auto &[_, V] : SToGlob.find_prefixes(reverse(Query))) {
189 for (int Idx : reverse(V)) {
190 if (Best > Idx)
191 break;
192 const GlobMatcher::Glob &G = Globs[Idx];
193 if (G.Pattern.match(Query)) {
194 Best = Idx;
195 // As soon as we find a match in the vector, we can break for this
196 // vector, since the globs are already sorted by priority within the
197 // prefix group. However, we continue searching other prefix groups
198 // in the map, as they may contain a better match overall.
199 break;
200 }
201 }
202 }
203 }
204 }
205
206 if (!SubstrToGlob.empty()) {
207 // As we don't know when substring exactly starts, we will try all
208 // possibilities. In most cases search will fail on first characters.
209 for (StringRef Q = Query; !Q.empty(); Q = Q.drop_front()) {
210 for (const auto &[_, V] : SubstrToGlob.find_prefixes(Q)) {
211 for (int Idx : reverse(V)) {
212 if (Best > Idx)
213 break;
214 const GlobMatcher::Glob &G = Globs[Idx];
215 if (G.Pattern.match(Query)) {
216 Best = Idx;
217 // As soon as we find a match in the vector, we can break for this
218 // vector, since the globs are already sorted by priority within the
219 // prefix group. However, we continue searching other prefix groups
220 // in the map, as they may contain a better match overall.
221 break;
222 }
223 }
224 }
225 }
226 }
227 if (Best < 0)
228 return NotMatched;
229 return {Globs[Best].Name, Globs[Best].LineNo};
230}
231
232Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
233 : RemoveDotSlash(RemoveDotSlash) {
234 if (UseGlobs)
235 M.emplace<GlobMatcher>();
236 else
237 M.emplace<RegexMatcher>();
238}
239
240Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
241 return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
242}
243
244void Matcher::preprocess(bool BySize) {
245 return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
246}
247
248Match Matcher::match(StringRef Query) const {
249 if (RemoveDotSlash)
251 return std::visit([&](auto &V) -> Match { return V.match(Query); }, M);
252}
253} // namespace
254
256public:
257 void preprocess(bool OrderBySize);
258 const Matcher *findMatcher(StringRef Prefix, StringRef Category) const;
259
261
262 explicit SectionImpl(bool UseGlobs)
263 : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {}
264
267};
268
269// TODO: Refactor this to return Expected<...>
270std::unique_ptr<SpecialCaseList>
271SpecialCaseList::create(const std::vector<std::string> &Paths,
272 llvm::vfs::FileSystem &FS, std::string &Error) {
273 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
274 if (SCL->createInternal(Paths, FS, Error))
275 return SCL;
276 return nullptr;
277}
278
279std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
280 std::string &Error) {
281 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
282 if (SCL->createInternal(MB, Error))
283 return SCL;
284 return nullptr;
285}
286
287std::unique_ptr<SpecialCaseList>
288SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
290 std::string Error;
291 if (auto SCL = create(Paths, FS, Error))
292 return SCL;
294}
295
296bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
297 vfs::FileSystem &VFS, std::string &Error) {
298 for (size_t i = 0; i < Paths.size(); ++i) {
299 const auto &Path = Paths[i];
301 VFS.getBufferForFile(Path);
302 if (std::error_code EC = FileOrErr.getError()) {
303 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
304 return false;
305 }
306 std::string ParseError;
307 if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) {
308 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
309 return false;
310 }
311 }
312 return true;
313}
314
316 bool OrderBySize) {
317 if (!parse(0, MB, Error, OrderBySize))
318 return false;
319 return true;
320}
321
323SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
324 unsigned LineNo, bool UseGlobs) {
325 SectionStr = SectionStr.copy(StrAlloc);
326 Sections.emplace_back(SectionStr, FileNo, UseGlobs);
327 auto &Section = Sections.back();
328
329 if (auto Err = Section.Impl->SectionMatcher.insert(SectionStr, LineNo)) {
331 "malformed section at line " + Twine(LineNo) +
332 ": '" + SectionStr +
333 "': " + toString(std::move(Err)));
334 }
335
336 return &Section;
337}
338
339bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
340 std::string &Error, bool OrderBySize) {
341 unsigned long long Version = 2;
342
343 StringRef Header = MB->getBuffer();
344 if (Header.consume_front("#!special-case-list-v"))
345 consumeUnsignedInteger(Header, 10, Version);
346
347 // In https://reviews.llvm.org/D154014 we added glob support and planned
348 // to remove regex support in patterns. We temporarily support the
349 // original behavior using regexes if "#!special-case-list-v1" is the
350 // first line of the file. For more details, see
351 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
352 bool UseGlobs = Version > 1;
353
354 bool RemoveDotSlash = Version > 2;
355
356 auto ErrOrSection = addSection("*", FileIdx, 1, true);
357 if (auto Err = ErrOrSection.takeError()) {
358 Error = toString(std::move(Err));
359 return false;
360 }
361 Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get();
362
363 // This is the current list of prefixes for all existing users matching file
364 // path. We may need parametrization in constructor in future.
365 constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
366
367 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
368 !LineIt.is_at_eof(); LineIt++) {
369 unsigned LineNo = LineIt.line_number();
370 StringRef Line = LineIt->trim();
371 if (Line.empty())
372 continue;
373
374 // Save section names
375 if (Line.starts_with("[")) {
376 if (!Line.ends_with("]")) {
377 Error =
378 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
379 .str();
380 return false;
381 }
382
383 auto ErrOrSection =
384 addSection(Line.drop_front().drop_back(), FileIdx, LineNo, UseGlobs);
385 if (auto Err = ErrOrSection.takeError()) {
386 Error = toString(std::move(Err));
387 return false;
388 }
389 CurrentImpl = ErrOrSection.get()->Impl.get();
390 continue;
391 }
392
393 // Get our prefix and unparsed glob.
394 auto [Prefix, Postfix] = Line.split(":");
395 if (Postfix.empty()) {
396 // Missing ':' in the line.
397 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
398 return false;
399 }
400
401 auto [Pattern, Category] = Postfix.split("=");
402 auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
403 Category, UseGlobs,
404 RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
405 Pattern = Pattern.copy(StrAlloc);
406 if (auto Err = It->second.insert(Pattern, LineNo)) {
407 Error =
408 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
409 Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
410 .str();
411 return false;
412 }
413 }
414
415 for (Section &S : Sections)
416 S.Impl->preprocess(OrderBySize);
417
418 return true;
419}
420
421SpecialCaseList::~SpecialCaseList() = default;
422
424 StringRef Query, StringRef Category) const {
425 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
426 return LineNo;
427}
428
429std::pair<unsigned, unsigned>
431 StringRef Query, StringRef Category) const {
432 for (const auto &S : reverse(Sections)) {
433 if (S.Impl->SectionMatcher.matchAny(Section)) {
434 unsigned Blame = S.getLastMatch(Prefix, Query, Category);
435 if (Blame)
436 return {S.FileIdx, Blame};
437 }
438 }
439 return NotFound;
440}
441
443 bool UseGlobs)
444 : Name(Str), FileIdx(FileIdx),
445 Impl(std::make_unique<SectionImpl>(UseGlobs)) {}
446
448
450
452 return Impl->SectionMatcher.matchAny(Name);
453}
454
455const Matcher *
457 StringRef Category) const {
459 if (I == Entries.end())
460 return nullptr;
461 StringMap<Matcher>::const_iterator II = I->second.find(Category);
462 if (II == I->second.end())
463 return nullptr;
464
465 return &II->second;
466}
467
469 SectionMatcher.preprocess(false);
470 for (auto &[K1, E] : Entries)
471 for (auto &[K2, M] : E)
472 M.preprocess(OrderBySize);
473}
474
476 StringRef Query,
477 StringRef Category) const {
478 if (const Matcher *M = Impl->findMatcher(Prefix, Category))
479 return M->match(Query).second;
480 return 0;
481}
482
484 StringRef Query,
485 StringRef Category) const {
486 if (const Matcher *M = Impl->findMatcher(Prefix, Category))
487 return M->match(Query).first;
488 return {};
489}
490
492 return Impl->Entries.find(Prefix) != Impl->Entries.end();
493}
494
495} // namespace llvm
This file defines the StringMap class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define _
static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Definition LineTable.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static const char * toString(MIToken::TokenKind TokenKind)
Definition MIParser.cpp:624
static Error addSection(const NewSectionInfo &NewSection, Object &Obj)
Register Reg
uint64_t IntrinsicInst * II
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
Defines the virtual file system interface vfs::FileSystem.
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
This interface provides simple read-only access to a block of memory, and provides simple methods for...
StringMap< StringMap< Matcher > > SectionEntries
const Matcher * findMatcher(StringRef Prefix, StringRef Category) const
LLVM_ABI Section(StringRef Name, unsigned FileIdx, bool UseGlobs)
LLVM_ABI StringRef getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const
LLVM_ABI bool hasPrefix(StringRef Prefix) const
Returns true if the section has any entries for the given prefix.
LLVM_ABI unsigned getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const
LLVM_ABI bool matchName(StringRef Name) const
static constexpr std::pair< unsigned, unsigned > NotFound
LLVM_ABI std::pair< unsigned, unsigned > inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns the file index and the line number <FileIdx, LineNo> corresponding to the special case list e...
LLVM_ABI bool createInternal(const std::vector< std::string > &Paths, vfs::FileSystem &VFS, std::string &Error)
static LLVM_ABI std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
static LLVM_ABI std::unique_ptr< SpecialCaseList > create(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS, std::string &Error)
Parses the special case list entries from files.
LLVM_ABI bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns true, if special case list contains a line.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:133
StringMapIterBase< StringMap< Matcher >, true > const_iterator
Definition StringMap.h:220
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
StringRef copy(Allocator &A) const
Definition StringRef.h:162
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
bool match(Val *V, const Pattern &P)
LLVM_ABI StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Remove redundant leading "./" pieces and consecutive separators.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
Definition STLExtras.h:2058
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, unsigned long long &Result)
@ invalid_argument
Definition Errc.h:56
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1867
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867