Line data Source code
1 : //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //===----------------------------------------------------------------------===//
8 : //
9 : // This is a utility class used to parse user-provided text files with
10 : // "special case lists" for code sanitizers. Such files are used to
11 : // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers
12 : // like AddressSanitizer or UndefinedBehaviorSanitizer.
13 : //
14 : // Empty lines and lines starting with "#" are ignored. Sections are defined
15 : // using a '[section_name]' header and can be used to specify sanitizers the
16 : // entries below it apply to. Section names are regular expressions, and
17 : // entries without a section header match all sections (e.g. an '[*]' header
18 : // is assumed.)
19 : // The remaining lines should have the form:
20 : // prefix:wildcard_expression[=category]
21 : // If category is not specified, it is assumed to be empty string.
22 : // Definitions of "prefix" and "category" are sanitizer-specific. For example,
23 : // sanitizer blacklists support prefixes "src", "fun" and "global".
24 : // Wildcard expressions define, respectively, source files, functions or
25 : // globals which shouldn't be instrumented.
26 : // Examples of categories:
27 : // "functional": used in DFSan to list functions with pure functional
28 : // semantics.
29 : // "init": used in ASan blacklist to disable initialization-order bugs
30 : // detection for certain globals or source files.
31 : // Full special case list file example:
32 : // ---
33 : // [address]
34 : // # Blacklisted items:
35 : // fun:*_ZN4base6subtle*
36 : // global:*global_with_bad_access_or_initialization*
37 : // global:*global_with_initialization_issues*=init
38 : // type:*Namespace::ClassName*=init
39 : // src:file_with_tricky_code.cc
40 : // src:ignore-global-initializers-issues.cc=init
41 : //
42 : // [dataflow]
43 : // # Functions with pure functional semantics:
44 : // fun:cos=functional
45 : // fun:sin=functional
46 : // ---
47 : // Note that the wild card is in fact an llvm::Regex, but * is automatically
48 : // replaced with .*
49 : //
50 : //===----------------------------------------------------------------------===//
51 :
52 : #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
53 : #define LLVM_SUPPORT_SPECIALCASELIST_H
54 :
55 : #include "llvm/ADT/StringMap.h"
56 : #include "llvm/ADT/StringSet.h"
57 : #include "llvm/Support/Regex.h"
58 : #include "llvm/Support/TrigramIndex.h"
59 : #include <string>
60 : #include <vector>
61 :
62 : namespace llvm {
63 : class MemoryBuffer;
64 : class Regex;
65 : class StringRef;
66 :
67 : class SpecialCaseList {
68 : public:
69 : /// Parses the special case list entries from files. On failure, returns
70 : /// 0 and writes an error message to string.
71 : static std::unique_ptr<SpecialCaseList>
72 : create(const std::vector<std::string> &Paths, std::string &Error);
73 : /// Parses the special case list from a memory buffer. On failure, returns
74 : /// 0 and writes an error message to string.
75 : static std::unique_ptr<SpecialCaseList> create(const MemoryBuffer *MB,
76 : std::string &Error);
77 : /// Parses the special case list entries from files. On failure, reports a
78 : /// fatal error.
79 : static std::unique_ptr<SpecialCaseList>
80 : createOrDie(const std::vector<std::string> &Paths);
81 :
82 : ~SpecialCaseList();
83 :
84 : /// Returns true, if special case list contains a line
85 : /// \code
86 : /// @Prefix:<E>=@Category
87 : /// \endcode
88 : /// where @Query satisfies wildcard expression <E> in a given @Section.
89 : bool inSection(StringRef Section, StringRef Prefix, StringRef Query,
90 : StringRef Category = StringRef()) const;
91 :
92 : /// Returns the line number corresponding to the special case list entry if
93 : /// the special case list contains a line
94 : /// \code
95 : /// @Prefix:<E>=@Category
96 : /// \endcode
97 : /// where @Query satisfies wildcard expression <E> in a given @Section.
98 : /// Returns zero if there is no blacklist entry corresponding to this
99 : /// expression.
100 : unsigned inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
101 : StringRef Category = StringRef()) const;
102 :
103 : protected:
104 : // Implementations of the create*() functions that can also be used by derived
105 : // classes.
106 : bool createInternal(const std::vector<std::string> &Paths,
107 : std::string &Error);
108 : bool createInternal(const MemoryBuffer *MB, std::string &Error);
109 :
110 : SpecialCaseList() = default;
111 : SpecialCaseList(SpecialCaseList const &) = delete;
112 : SpecialCaseList &operator=(SpecialCaseList const &) = delete;
113 :
114 : /// Represents a set of regular expressions. Regular expressions which are
115 : /// "literal" (i.e. no regex metacharacters) are stored in Strings. The
116 : /// reason for doing so is efficiency; StringMap is much faster at matching
117 : /// literal strings than Regex.
118 635 : class Matcher {
119 : public:
120 : bool insert(std::string Regexp, unsigned LineNumber, std::string &REError);
121 : // Returns the line number in the source file that this query matches to.
122 : // Returns zero if no match is found.
123 : unsigned match(StringRef Query) const;
124 :
125 : private:
126 : StringMap<unsigned> Strings;
127 : TrigramIndex Trigrams;
128 : std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
129 : };
130 :
131 : using SectionEntries = StringMap<StringMap<Matcher>>;
132 :
133 274 : struct Section {
134 265 : Section(std::unique_ptr<Matcher> M) : SectionMatcher(std::move(M)){};
135 :
136 : std::unique_ptr<Matcher> SectionMatcher;
137 : SectionEntries Entries;
138 : };
139 :
140 : std::vector<Section> Sections;
141 :
142 : /// Parses just-constructed SpecialCaseList entries from a memory buffer.
143 : bool parse(const MemoryBuffer *MB, StringMap<size_t> &SectionsMap,
144 : std::string &Error);
145 :
146 : // Helper method for derived classes to search by Prefix, Query, and Category
147 : // once they have already resolved a section entry.
148 : unsigned inSectionBlame(const SectionEntries &Entries, StringRef Prefix,
149 : StringRef Query, StringRef Category) const;
150 : };
151 :
152 : } // namespace llvm
153 :
154 : #endif // LLVM_SUPPORT_SPECIALCASELIST_H
155 :
|