Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/lld/MachO/ExportTrie.cpp
Warning:line 284, column 22
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ExportTrie.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D LLD_VENDOR="Debian" -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/lld/MachO -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/lld/MachO -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/lld/include -I tools/lld/include -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/../libunwind/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/lld/MachO/ExportTrie.cpp
1//===- ExportTrie.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a partial implementation of the Mach-O export trie format. It's
10// essentially a symbol table encoded as a compressed prefix trie, meaning that
11// the common prefixes of each symbol name are shared for a more compact
12// representation. The prefixes are stored on the edges of the trie, and one
13// edge can represent multiple characters. For example, given two exported
14// symbols _bar and _baz, we will have a trie like this (terminal nodes are
15// marked with an asterisk):
16//
17// +-+-+
18// | | // root node
19// +-+-+
20// |
21// | _ba
22// |
23// +-+-+
24// | |
25// +-+-+
26// r / \ z
27// / \
28// +-+-+ +-+-+
29// | * | | * |
30// +-+-+ +-+-+
31//
32// More documentation of the format can be found in
33// llvm/tools/obj2yaml/macho2yaml.cpp.
34//
35//===----------------------------------------------------------------------===//
36
37#include "ExportTrie.h"
38#include "Symbols.h"
39
40#include "lld/Common/ErrorHandler.h"
41#include "lld/Common/Memory.h"
42#include "llvm/ADT/Optional.h"
43#include "llvm/BinaryFormat/MachO.h"
44#include "llvm/Support/LEB128.h"
45
46using namespace llvm;
47using namespace lld;
48using namespace lld::macho;
49
50namespace {
51
52struct Edge {
53 Edge(StringRef s, TrieNode *node) : substring(s), child(node) {}
54
55 StringRef substring;
56 struct TrieNode *child;
57};
58
59struct ExportInfo {
60 uint64_t address;
61 uint8_t flags = 0;
62 ExportInfo(const Symbol &sym, uint64_t imageBase)
63 : address(sym.getVA() - imageBase) {
64 using namespace llvm::MachO;
65 // Set the symbol type.
66 if (sym.isWeakDef())
67 flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
68 // TODO: Add proper support for re-exports & stub-and-resolver flags.
69
70 // Set the symbol kind.
71 if (sym.isTlv()) {
72 flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
73 } else if (auto *defined = dyn_cast<Defined>(&sym)) {
74 if (defined->isAbsolute())
75 flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
76 }
77 }
78};
79
80} // namespace
81
82struct macho::TrieNode {
83 std::vector<Edge> edges;
84 Optional<ExportInfo> info;
85 // Estimated offset from the start of the serialized trie to the current node.
86 // This will converge to the true offset when updateOffset() is run to a
87 // fixpoint.
88 size_t offset = 0;
89
90 // Returns whether the new estimated offset differs from the old one.
91 bool updateOffset(size_t &nextOffset);
92 void writeTo(uint8_t *buf) const;
93};
94
95bool TrieNode::updateOffset(size_t &nextOffset) {
96 // Size of the whole node (including the terminalSize and the outgoing edges.)
97 // In contrast, terminalSize only records the size of the other data in the
98 // node.
99 size_t nodeSize;
100 if (info) {
101 uint32_t terminalSize =
102 getULEB128Size(info->flags) + getULEB128Size(info->address);
103 // Overall node size so far is the uleb128 size of the length of the symbol
104 // info + the symbol info itself.
105 nodeSize = terminalSize + getULEB128Size(terminalSize);
106 } else {
107 nodeSize = 1; // Size of terminalSize (which has a value of 0)
108 }
109 // Compute size of all child edges.
110 ++nodeSize; // Byte for number of children.
111 for (const Edge &edge : edges) {
112 nodeSize += edge.substring.size() + 1 // String length.
113 + getULEB128Size(edge.child->offset); // Offset len.
114 }
115 // On input, 'nextOffset' is the new preferred location for this node.
116 bool result = (offset != nextOffset);
117 // Store new location in node object for use by parents.
118 offset = nextOffset;
119 nextOffset += nodeSize;
120 return result;
121}
122
123void TrieNode::writeTo(uint8_t *buf) const {
124 buf += offset;
125 if (info) {
126 // TrieNodes with Symbol info: size, flags address
127 uint32_t terminalSize =
128 getULEB128Size(info->flags) + getULEB128Size(info->address);
129 buf += encodeULEB128(terminalSize, buf);
130 buf += encodeULEB128(info->flags, buf);
131 buf += encodeULEB128(info->address, buf);
132 } else {
133 // TrieNode with no Symbol info.
134 *buf++ = 0; // terminalSize
135 }
136 // Add number of children. TODO: Handle case where we have more than 256.
137 assert(edges.size() < 256)(static_cast <bool> (edges.size() < 256) ? void (0) :
__assert_fail ("edges.size() < 256", "lld/MachO/ExportTrie.cpp"
, 137, __extension__ __PRETTY_FUNCTION__))
;
138 *buf++ = edges.size();
139 // Append each child edge substring and node offset.
140 for (const Edge &edge : edges) {
141 memcpy(buf, edge.substring.data(), edge.substring.size());
142 buf += edge.substring.size();
143 *buf++ = '\0';
144 buf += encodeULEB128(edge.child->offset, buf);
145 }
146}
147
148TrieBuilder::~TrieBuilder() {
149 for (TrieNode *node : nodes)
150 delete node;
151}
152
153TrieNode *TrieBuilder::makeNode() {
154 auto *node = new TrieNode();
155 nodes.emplace_back(node);
156 return node;
157}
158
159static int charAt(const Symbol *sym, size_t pos) {
160 StringRef str = sym->getName();
161 if (pos >= str.size())
162 return -1;
163 return str[pos];
164}
165
166// Build the trie by performing a three-way radix quicksort: We start by sorting
167// the strings by their first characters, then sort the strings with the same
168// first characters by their second characters, and so on recursively. Each
169// time the prefixes diverge, we add a node to the trie.
170//
171// node: The most recently created node along this path in the trie (i.e.
172// the furthest from the root.)
173// lastPos: The prefix length of the most recently created node, i.e. the number
174// of characters along its path from the root.
175// pos: The string index we are currently sorting on. Note that each symbol
176// S contained in vec has the same prefix S[0...pos).
177void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec,
178 TrieNode *node, size_t lastPos, size_t pos) {
179tailcall:
180 if (vec.empty())
181 return;
182
183 // Partition items so that items in [0, i) are less than the pivot,
184 // [i, j) are the same as the pivot, and [j, vec.size()) are greater than
185 // the pivot.
186 const Symbol *pivotSymbol = vec[vec.size() / 2];
187 int pivot = charAt(pivotSymbol, pos);
188 size_t i = 0;
189 size_t j = vec.size();
190 for (size_t k = 0; k < j;) {
191 int c = charAt(vec[k], pos);
192 if (c < pivot)
193 std::swap(vec[i++], vec[k++]);
194 else if (c > pivot)
195 std::swap(vec[--j], vec[k]);
196 else
197 k++;
198 }
199
200 bool isTerminal = pivot == -1;
201 bool prefixesDiverge = i != 0 || j != vec.size();
202 if (lastPos != pos && (isTerminal || prefixesDiverge)) {
203 TrieNode *newNode = makeNode();
204 node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos),
205 newNode);
206 node = newNode;
207 lastPos = pos;
208 }
209
210 sortAndBuild(vec.slice(0, i), node, lastPos, pos);
211 sortAndBuild(vec.slice(j), node, lastPos, pos);
212
213 if (isTerminal) {
214 assert(j - i == 1)(static_cast <bool> (j - i == 1) ? void (0) : __assert_fail
("j - i == 1", "lld/MachO/ExportTrie.cpp", 214, __extension__
__PRETTY_FUNCTION__))
; // no duplicate symbols
215 node->info = ExportInfo(*pivotSymbol, imageBase);
216 } else {
217 // This is the tail-call-optimized version of the following:
218 // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1);
219 vec = vec.slice(i, j - i);
220 ++pos;
221 goto tailcall;
222 }
223}
224
225size_t TrieBuilder::build() {
226 if (exported.empty())
227 return 0;
228
229 TrieNode *root = makeNode();
230 sortAndBuild(exported, root, 0, 0);
231
232 // Assign each node in the vector an offset in the trie stream, iterating
233 // until all uleb128 sizes have stabilized.
234 size_t offset;
235 bool more;
236 do {
237 offset = 0;
238 more = false;
239 for (TrieNode *node : nodes)
240 more |= node->updateOffset(offset);
241 } while (more);
242
243 return offset;
244}
245
246void TrieBuilder::writeTo(uint8_t *buf) const {
247 for (TrieNode *node : nodes)
248 node->writeTo(buf);
249}
250
251namespace {
252
253// Parse a serialized trie and invoke a callback for each entry.
254class TrieParser {
255public:
256 TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
257 : start(buf), end(start + size), callback(callback) {}
258
259 void parse(const uint8_t *buf, const Twine &cumulativeString);
260
261 void parse() { parse(start, ""); }
4
Calling 'TrieParser::parse'
262
263 const uint8_t *start;
264 const uint8_t *end;
265 const TrieEntryCallback &callback;
266};
267
268} // namespace
269
270void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
271 if (buf >= end)
5
Assuming 'buf' is < field 'end'
6
Taking false branch
272 fatal("Node offset points outside export section");
273
274 unsigned ulebSize;
275 uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
276 buf += ulebSize;
277 uint64_t flags = 0;
278 size_t offset;
279 if (terminalSize
6.1
'terminalSize' is equal to 0
!= 0) {
7
Taking false branch
280 flags = decodeULEB128(buf, &ulebSize);
281 callback(cumulativeString, flags);
282 }
283 buf += terminalSize;
284 uint8_t numEdges = *buf++;
8
Null pointer value stored to 'buf'
9
Dereference of null pointer
285 for (uint8_t i = 0; i < numEdges; ++i) {
286 const char *cbuf = reinterpret_cast<const char *>(buf);
287 StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
288 buf += substring.size() + 1;
289 offset = decodeULEB128(buf, &ulebSize);
290 buf += ulebSize;
291 parse(start + offset, cumulativeString + substring);
292 }
293}
294
295void macho::parseTrie(const uint8_t *buf, size_t size,
296 const TrieEntryCallback &callback) {
297 if (size == 0)
1
Assuming 'size' is not equal to 0
2
Taking false branch
298 return;
299
300 TrieParser(buf, size, callback).parse();
3
Calling 'TrieParser::parse'
301}