LLVM 22.0.0git
SpecialCaseList.cpp
Go to the documentation of this file.
1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a utility class for instrumentation passes (like AddressSanitizer
10// or ThreadSanitizer) to avoid instrumenting some functions or global
11// variables, or to instrument some functions or global variables in a specific
12// way, based on a user-supplied list.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/StringRef.h"
23#include <algorithm>
24#include <limits>
25#include <memory>
26#include <stdio.h>
27#include <string>
28#include <system_error>
29#include <utility>
30
31namespace llvm {
32
33Error SpecialCaseList::RegexMatcher::insert(StringRef Pattern,
34 unsigned LineNumber) {
35 if (Pattern.empty())
37 "Supplied regex was blank");
38
39 // Replace * with .*
40 auto Regexp = Pattern.str();
41 for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
42 pos += strlen(".*")) {
43 Regexp.replace(pos, strlen("*"), ".*");
44 }
45
46 Regexp = (Twine("^(") + StringRef(Regexp) + ")$").str();
47
48 // Check that the regexp is valid.
49 Regex CheckRE(Regexp);
50 std::string REError;
51 if (!CheckRE.isValid(REError))
53
54 RegExes.emplace_back(Pattern, LineNumber, std::move(CheckRE));
55 return Error::success();
56}
57
58void SpecialCaseList::RegexMatcher::preprocess(bool BySize) {
59 if (BySize) {
60 llvm::stable_sort(RegExes, [](const Reg &A, const Reg &B) {
61 return A.Name.size() < B.Name.size();
62 });
63 }
64}
65
66void SpecialCaseList::RegexMatcher::match(
67 StringRef Query,
68 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
69 for (const auto &R : reverse(RegExes))
70 if (R.Rg.match(Query))
71 return Cb(R.Name, R.LineNo);
72}
73
74Error SpecialCaseList::GlobMatcher::insert(StringRef Pattern,
75 unsigned LineNumber) {
76 if (Pattern.empty())
77 return createStringError(errc::invalid_argument, "Supplied glob was blank");
78
79 auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024);
80 if (auto Err = Res.takeError())
81 return Err;
82 Globs.emplace_back(Pattern, LineNumber, std::move(Res.get()));
83 return Error::success();
84}
85
86void SpecialCaseList::GlobMatcher::preprocess(bool BySize) {
87 if (BySize) {
88 llvm::stable_sort(Globs, [](const Glob &A, const Glob &B) {
89 return A.Name.size() < B.Name.size();
90 });
91 }
92}
93
94void SpecialCaseList::GlobMatcher::match(
95 StringRef Query,
96 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
97 for (const auto &G : reverse(Globs))
98 if (G.Pattern.match(Query))
99 return Cb(G.Name, G.LineNo);
100}
101
102SpecialCaseList::Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
103 : RemoveDotSlash(RemoveDotSlash) {
104 if (UseGlobs)
105 M.emplace<GlobMatcher>();
106 else
107 M.emplace<RegexMatcher>();
108}
109
110Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber) {
111 return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
112}
113
114LLVM_ABI void SpecialCaseList::Matcher::preprocess(bool BySize) {
115 return std::visit([&](auto &V) { return V.preprocess(BySize); }, M);
116}
117
118void SpecialCaseList::Matcher::match(
119 StringRef Query,
120 llvm::function_ref<void(StringRef Rule, unsigned LineNo)> Cb) const {
121 if (RemoveDotSlash)
123 return std::visit([&](auto &V) { return V.match(Query, Cb); }, M);
124}
125
126// TODO: Refactor this to return Expected<...>
127std::unique_ptr<SpecialCaseList>
128SpecialCaseList::create(const std::vector<std::string> &Paths,
129 llvm::vfs::FileSystem &FS, std::string &Error) {
130 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
131 if (SCL->createInternal(Paths, FS, Error))
132 return SCL;
133 return nullptr;
134}
135
136std::unique_ptr<SpecialCaseList> SpecialCaseList::create(const MemoryBuffer *MB,
137 std::string &Error) {
138 std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
139 if (SCL->createInternal(MB, Error))
140 return SCL;
141 return nullptr;
142}
143
144std::unique_ptr<SpecialCaseList>
145SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
147 std::string Error;
148 if (auto SCL = create(Paths, FS, Error))
149 return SCL;
151}
152
153bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
154 vfs::FileSystem &VFS, std::string &Error) {
155 for (size_t i = 0; i < Paths.size(); ++i) {
156 const auto &Path = Paths[i];
158 VFS.getBufferForFile(Path);
159 if (std::error_code EC = FileOrErr.getError()) {
160 Error = (Twine("can't open file '") + Path + "': " + EC.message()).str();
161 return false;
162 }
163 std::string ParseError;
164 if (!parse(i, FileOrErr.get().get(), ParseError, /*OrderBySize=*/false)) {
165 Error = (Twine("error parsing file '") + Path + "': " + ParseError).str();
166 return false;
167 }
168 }
169 return true;
170}
171
173 bool OrderBySize) {
174 if (!parse(0, MB, Error, OrderBySize))
175 return false;
176 return true;
177}
178
180SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
181 unsigned LineNo, bool UseGlobs) {
182 Sections.emplace_back(SectionStr, FileNo, UseGlobs);
183 auto &Section = Sections.back();
184
185 SectionStr = SectionStr.copy(StrAlloc);
186 if (auto Err = Section.SectionMatcher.insert(SectionStr, LineNo)) {
188 "malformed section at line " + Twine(LineNo) +
189 ": '" + SectionStr +
190 "': " + toString(std::move(Err)));
191 }
192
193 return &Section;
194}
195
196bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
197 std::string &Error, bool OrderBySize) {
198 unsigned long long Version = 2;
199
200 StringRef Header = MB->getBuffer();
201 if (Header.consume_front("#!special-case-list-v"))
202 consumeUnsignedInteger(Header, 10, Version);
203
204 // In https://reviews.llvm.org/D154014 we added glob support and planned
205 // to remove regex support in patterns. We temporarily support the
206 // original behavior using regexes if "#!special-case-list-v1" is the
207 // first line of the file. For more details, see
208 // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
209 bool UseGlobs = Version > 1;
210
211 bool RemoveDotSlash = Version > 2;
212
213 Section *CurrentSection;
214 if (auto Err = addSection("*", FileIdx, 1, true).moveInto(CurrentSection)) {
215 Error = toString(std::move(Err));
216 return false;
217 }
218
219 // This is the current list of prefixes for all existing users matching file
220 // path. We may need parametrization in constructor in future.
221 constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
222
223 for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
224 !LineIt.is_at_eof(); LineIt++) {
225 unsigned LineNo = LineIt.line_number();
226 StringRef Line = LineIt->trim();
227 if (Line.empty())
228 continue;
229
230 // Save section names
231 if (Line.starts_with("[")) {
232 if (!Line.ends_with("]")) {
233 Error =
234 ("malformed section header on line " + Twine(LineNo) + ": " + Line)
235 .str();
236 return false;
237 }
238
239 if (auto Err = addSection(Line.drop_front().drop_back(), FileIdx, LineNo,
240 UseGlobs)
241 .moveInto(CurrentSection)) {
242 Error = toString(std::move(Err));
243 return false;
244 }
245 continue;
246 }
247
248 // Get our prefix and unparsed glob.
249 auto [Prefix, Postfix] = Line.split(":");
250 if (Postfix.empty()) {
251 // Missing ':' in the line.
252 Error = ("malformed line " + Twine(LineNo) + ": '" + Line + "'").str();
253 return false;
254 }
255
256 auto [Pattern, Category] = Postfix.split("=");
257 auto [It, _] = CurrentSection->Entries[Prefix].try_emplace(
258 Category, UseGlobs,
259 RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
260 Pattern = Pattern.copy(StrAlloc);
261 if (auto Err = It->second.insert(Pattern, LineNo)) {
262 Error =
263 (Twine("malformed ") + (UseGlobs ? "glob" : "regex") + " in line " +
264 Twine(LineNo) + ": '" + Pattern + "': " + toString(std::move(Err)))
265 .str();
266 return false;
267 }
268 }
269
270 for (Section &S : Sections)
271 S.preprocess(OrderBySize);
272
273 return true;
274}
275
276SpecialCaseList::~SpecialCaseList() = default;
277
279 StringRef Query, StringRef Category) const {
280 auto [FileIdx, LineNo] = inSectionBlame(Section, Prefix, Query, Category);
281 return LineNo;
282}
283
284std::pair<unsigned, unsigned>
286 StringRef Query, StringRef Category) const {
287 for (const auto &S : reverse(Sections)) {
288 if (S.SectionMatcher.matchAny(Section)) {
289 unsigned Blame = S.getLastMatch(Prefix, Query, Category);
290 if (Blame)
291 return {S.FileIdx, Blame};
292 }
293 }
294 return NotFound;
295}
296
297const SpecialCaseList::Matcher *
298SpecialCaseList::Section::findMatcher(StringRef Prefix,
299 StringRef Category) const {
300 SectionEntries::const_iterator I = Entries.find(Prefix);
301 if (I == Entries.end())
302 return nullptr;
303 StringMap<Matcher>::const_iterator II = I->second.find(Category);
304 if (II == I->second.end())
305 return nullptr;
306
307 return &II->second;
308}
309
310LLVM_ABI void SpecialCaseList::Section::preprocess(bool OrderBySize) {
311 SectionMatcher.preprocess(false);
312 for (auto &[K1, E] : Entries)
313 for (auto &[K2, M] : E)
314 M.preprocess(OrderBySize);
315}
316
318 StringRef Query,
319 StringRef Category) const {
320 unsigned LastLine = 0;
321 if (const Matcher *M = findMatcher(Prefix, Category)) {
322 M->match(Query, [&](StringRef, unsigned LineNo) {
323 LastLine = std::max(LastLine, LineNo);
324 });
325 }
326 return LastLine;
327}
328
330 StringRef Query,
331 StringRef Category) const {
332 StringRef LongestRule;
333 if (const Matcher *M = findMatcher(Prefix, Category)) {
334 M->match(Query, [&](StringRef Rule, unsigned) {
335 if (LongestRule.size() < Rule.size())
336 LongestRule = Rule;
337 });
338 }
339 return LongestRule;
340}
341
342} // namespace llvm
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define _
static llvm::Error parse(DataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Definition LineTable.cpp:54
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
static const char * toString(MIToken::TokenKind TokenKind)
Definition MIParser.cpp:624
static Error addSection(const NewSectionInfo &NewSection, Object &Obj)
Register Reg
uint64_t IntrinsicInst * II
This file contains some templates that are useful if you are working with the STL at all.
This file contains some functions that are useful when dealing with strings.
Defines the virtual file system interface vfs::FileSystem.
Represents either an error or a value T.
Definition ErrorOr.h:56
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
static LLVM_ABI Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
This interface provides simple read-only access to a block of memory, and provides simple methods for...
static constexpr std::pair< unsigned, unsigned > NotFound
LLVM_ABI std::pair< unsigned, unsigned > inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns the file index and the line number <FileIdx, LineNo> corresponding to the special case list e...
LLVM_ABI bool createInternal(const std::vector< std::string > &Paths, vfs::FileSystem &VFS, std::string &Error)
static LLVM_ABI std::unique_ptr< SpecialCaseList > createOrDie(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS)
Parses the special case list entries from files.
static LLVM_ABI std::unique_ptr< SpecialCaseList > create(const std::vector< std::string > &Paths, llvm::vfs::FileSystem &FS, std::string &Error)
Parses the special case list entries from files.
LLVM_ABI bool inSection(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category=StringRef()) const
Returns true, if special case list contains a line.
StringMapIterBase< ValueTy, true > const_iterator
Definition StringMap.h:220
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef copy(Allocator &A) const
Definition StringRef.h:162
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
An efficient, type-erasing, non-owning reference to a callable.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
LLVM_ABI StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Remove redundant leading "./" pieces and consecutive separators.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
Definition STLExtras.h:2058
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, unsigned long long &Result)
@ invalid_argument
Definition Errc.h:56
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
LLVM_ABI StringRef getLongestMatch(StringRef Prefix, StringRef Query, StringRef Category) const
LLVM_ABI unsigned getLastMatch(StringRef Prefix, StringRef Query, StringRef Category) const