File: | build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/lld/MachO/ExportTrie.cpp |
Warning: | line 284, column 22 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- ExportTrie.cpp -----------------------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This is a partial implementation of the Mach-O export trie format. It's | |||
10 | // essentially a symbol table encoded as a compressed prefix trie, meaning that | |||
11 | // the common prefixes of each symbol name are shared for a more compact | |||
12 | // representation. The prefixes are stored on the edges of the trie, and one | |||
13 | // edge can represent multiple characters. For example, given two exported | |||
14 | // symbols _bar and _baz, we will have a trie like this (terminal nodes are | |||
15 | // marked with an asterisk): | |||
16 | // | |||
17 | // +-+-+ | |||
18 | // | | // root node | |||
19 | // +-+-+ | |||
20 | // | | |||
21 | // | _ba | |||
22 | // | | |||
23 | // +-+-+ | |||
24 | // | | | |||
25 | // +-+-+ | |||
26 | // r / \ z | |||
27 | // / \ | |||
28 | // +-+-+ +-+-+ | |||
29 | // | * | | * | | |||
30 | // +-+-+ +-+-+ | |||
31 | // | |||
32 | // More documentation of the format can be found in | |||
33 | // llvm/tools/obj2yaml/macho2yaml.cpp. | |||
34 | // | |||
35 | //===----------------------------------------------------------------------===// | |||
36 | ||||
37 | #include "ExportTrie.h" | |||
38 | #include "Symbols.h" | |||
39 | ||||
40 | #include "lld/Common/ErrorHandler.h" | |||
41 | #include "lld/Common/Memory.h" | |||
42 | #include "llvm/ADT/Optional.h" | |||
43 | #include "llvm/BinaryFormat/MachO.h" | |||
44 | #include "llvm/Support/LEB128.h" | |||
45 | ||||
46 | using namespace llvm; | |||
47 | using namespace lld; | |||
48 | using namespace lld::macho; | |||
49 | ||||
50 | namespace { | |||
51 | ||||
52 | struct Edge { | |||
53 | Edge(StringRef s, TrieNode *node) : substring(s), child(node) {} | |||
54 | ||||
55 | StringRef substring; | |||
56 | struct TrieNode *child; | |||
57 | }; | |||
58 | ||||
59 | struct ExportInfo { | |||
60 | uint64_t address; | |||
61 | uint8_t flags = 0; | |||
62 | ExportInfo(const Symbol &sym, uint64_t imageBase) | |||
63 | : address(sym.getVA() - imageBase) { | |||
64 | using namespace llvm::MachO; | |||
65 | // Set the symbol type. | |||
66 | if (sym.isWeakDef()) | |||
67 | flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; | |||
68 | // TODO: Add proper support for re-exports & stub-and-resolver flags. | |||
69 | ||||
70 | // Set the symbol kind. | |||
71 | if (sym.isTlv()) { | |||
72 | flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; | |||
73 | } else if (auto *defined = dyn_cast<Defined>(&sym)) { | |||
74 | if (defined->isAbsolute()) | |||
75 | flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE; | |||
76 | } | |||
77 | } | |||
78 | }; | |||
79 | ||||
80 | } // namespace | |||
81 | ||||
82 | struct macho::TrieNode { | |||
83 | std::vector<Edge> edges; | |||
84 | Optional<ExportInfo> info; | |||
85 | // Estimated offset from the start of the serialized trie to the current node. | |||
86 | // This will converge to the true offset when updateOffset() is run to a | |||
87 | // fixpoint. | |||
88 | size_t offset = 0; | |||
89 | ||||
90 | // Returns whether the new estimated offset differs from the old one. | |||
91 | bool updateOffset(size_t &nextOffset); | |||
92 | void writeTo(uint8_t *buf) const; | |||
93 | }; | |||
94 | ||||
95 | bool TrieNode::updateOffset(size_t &nextOffset) { | |||
96 | // Size of the whole node (including the terminalSize and the outgoing edges.) | |||
97 | // In contrast, terminalSize only records the size of the other data in the | |||
98 | // node. | |||
99 | size_t nodeSize; | |||
100 | if (info) { | |||
101 | uint32_t terminalSize = | |||
102 | getULEB128Size(info->flags) + getULEB128Size(info->address); | |||
103 | // Overall node size so far is the uleb128 size of the length of the symbol | |||
104 | // info + the symbol info itself. | |||
105 | nodeSize = terminalSize + getULEB128Size(terminalSize); | |||
106 | } else { | |||
107 | nodeSize = 1; // Size of terminalSize (which has a value of 0) | |||
108 | } | |||
109 | // Compute size of all child edges. | |||
110 | ++nodeSize; // Byte for number of children. | |||
111 | for (const Edge &edge : edges) { | |||
112 | nodeSize += edge.substring.size() + 1 // String length. | |||
113 | + getULEB128Size(edge.child->offset); // Offset len. | |||
114 | } | |||
115 | // On input, 'nextOffset' is the new preferred location for this node. | |||
116 | bool result = (offset != nextOffset); | |||
117 | // Store new location in node object for use by parents. | |||
118 | offset = nextOffset; | |||
119 | nextOffset += nodeSize; | |||
120 | return result; | |||
121 | } | |||
122 | ||||
123 | void TrieNode::writeTo(uint8_t *buf) const { | |||
124 | buf += offset; | |||
125 | if (info) { | |||
126 | // TrieNodes with Symbol info: size, flags address | |||
127 | uint32_t terminalSize = | |||
128 | getULEB128Size(info->flags) + getULEB128Size(info->address); | |||
129 | buf += encodeULEB128(terminalSize, buf); | |||
130 | buf += encodeULEB128(info->flags, buf); | |||
131 | buf += encodeULEB128(info->address, buf); | |||
132 | } else { | |||
133 | // TrieNode with no Symbol info. | |||
134 | *buf++ = 0; // terminalSize | |||
135 | } | |||
136 | // Add number of children. TODO: Handle case where we have more than 256. | |||
137 | assert(edges.size() < 256)(static_cast <bool> (edges.size() < 256) ? void (0) : __assert_fail ("edges.size() < 256", "lld/MachO/ExportTrie.cpp" , 137, __extension__ __PRETTY_FUNCTION__)); | |||
138 | *buf++ = edges.size(); | |||
139 | // Append each child edge substring and node offset. | |||
140 | for (const Edge &edge : edges) { | |||
141 | memcpy(buf, edge.substring.data(), edge.substring.size()); | |||
142 | buf += edge.substring.size(); | |||
143 | *buf++ = '\0'; | |||
144 | buf += encodeULEB128(edge.child->offset, buf); | |||
145 | } | |||
146 | } | |||
147 | ||||
148 | TrieBuilder::~TrieBuilder() { | |||
149 | for (TrieNode *node : nodes) | |||
150 | delete node; | |||
151 | } | |||
152 | ||||
153 | TrieNode *TrieBuilder::makeNode() { | |||
154 | auto *node = new TrieNode(); | |||
155 | nodes.emplace_back(node); | |||
156 | return node; | |||
157 | } | |||
158 | ||||
159 | static int charAt(const Symbol *sym, size_t pos) { | |||
160 | StringRef str = sym->getName(); | |||
161 | if (pos >= str.size()) | |||
162 | return -1; | |||
163 | return str[pos]; | |||
164 | } | |||
165 | ||||
166 | // Build the trie by performing a three-way radix quicksort: We start by sorting | |||
167 | // the strings by their first characters, then sort the strings with the same | |||
168 | // first characters by their second characters, and so on recursively. Each | |||
169 | // time the prefixes diverge, we add a node to the trie. | |||
170 | // | |||
171 | // node: The most recently created node along this path in the trie (i.e. | |||
172 | // the furthest from the root.) | |||
173 | // lastPos: The prefix length of the most recently created node, i.e. the number | |||
174 | // of characters along its path from the root. | |||
175 | // pos: The string index we are currently sorting on. Note that each symbol | |||
176 | // S contained in vec has the same prefix S[0...pos). | |||
177 | void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec, | |||
178 | TrieNode *node, size_t lastPos, size_t pos) { | |||
179 | tailcall: | |||
180 | if (vec.empty()) | |||
181 | return; | |||
182 | ||||
183 | // Partition items so that items in [0, i) are less than the pivot, | |||
184 | // [i, j) are the same as the pivot, and [j, vec.size()) are greater than | |||
185 | // the pivot. | |||
186 | const Symbol *pivotSymbol = vec[vec.size() / 2]; | |||
187 | int pivot = charAt(pivotSymbol, pos); | |||
188 | size_t i = 0; | |||
189 | size_t j = vec.size(); | |||
190 | for (size_t k = 0; k < j;) { | |||
191 | int c = charAt(vec[k], pos); | |||
192 | if (c < pivot) | |||
193 | std::swap(vec[i++], vec[k++]); | |||
194 | else if (c > pivot) | |||
195 | std::swap(vec[--j], vec[k]); | |||
196 | else | |||
197 | k++; | |||
198 | } | |||
199 | ||||
200 | bool isTerminal = pivot == -1; | |||
201 | bool prefixesDiverge = i != 0 || j != vec.size(); | |||
202 | if (lastPos != pos && (isTerminal || prefixesDiverge)) { | |||
203 | TrieNode *newNode = makeNode(); | |||
204 | node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos), | |||
205 | newNode); | |||
206 | node = newNode; | |||
207 | lastPos = pos; | |||
208 | } | |||
209 | ||||
210 | sortAndBuild(vec.slice(0, i), node, lastPos, pos); | |||
211 | sortAndBuild(vec.slice(j), node, lastPos, pos); | |||
212 | ||||
213 | if (isTerminal) { | |||
214 | assert(j - i == 1)(static_cast <bool> (j - i == 1) ? void (0) : __assert_fail ("j - i == 1", "lld/MachO/ExportTrie.cpp", 214, __extension__ __PRETTY_FUNCTION__)); // no duplicate symbols | |||
215 | node->info = ExportInfo(*pivotSymbol, imageBase); | |||
216 | } else { | |||
217 | // This is the tail-call-optimized version of the following: | |||
218 | // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1); | |||
219 | vec = vec.slice(i, j - i); | |||
220 | ++pos; | |||
221 | goto tailcall; | |||
222 | } | |||
223 | } | |||
224 | ||||
225 | size_t TrieBuilder::build() { | |||
226 | if (exported.empty()) | |||
227 | return 0; | |||
228 | ||||
229 | TrieNode *root = makeNode(); | |||
230 | sortAndBuild(exported, root, 0, 0); | |||
231 | ||||
232 | // Assign each node in the vector an offset in the trie stream, iterating | |||
233 | // until all uleb128 sizes have stabilized. | |||
234 | size_t offset; | |||
235 | bool more; | |||
236 | do { | |||
237 | offset = 0; | |||
238 | more = false; | |||
239 | for (TrieNode *node : nodes) | |||
240 | more |= node->updateOffset(offset); | |||
241 | } while (more); | |||
242 | ||||
243 | return offset; | |||
244 | } | |||
245 | ||||
246 | void TrieBuilder::writeTo(uint8_t *buf) const { | |||
247 | for (TrieNode *node : nodes) | |||
248 | node->writeTo(buf); | |||
249 | } | |||
250 | ||||
251 | namespace { | |||
252 | ||||
253 | // Parse a serialized trie and invoke a callback for each entry. | |||
254 | class TrieParser { | |||
255 | public: | |||
256 | TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback) | |||
257 | : start(buf), end(start + size), callback(callback) {} | |||
258 | ||||
259 | void parse(const uint8_t *buf, const Twine &cumulativeString); | |||
260 | ||||
261 | void parse() { parse(start, ""); } | |||
262 | ||||
263 | const uint8_t *start; | |||
264 | const uint8_t *end; | |||
265 | const TrieEntryCallback &callback; | |||
266 | }; | |||
267 | ||||
268 | } // namespace | |||
269 | ||||
270 | void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) { | |||
271 | if (buf >= end) | |||
272 | fatal("Node offset points outside export section"); | |||
273 | ||||
274 | unsigned ulebSize; | |||
275 | uint64_t terminalSize = decodeULEB128(buf, &ulebSize); | |||
276 | buf += ulebSize; | |||
277 | uint64_t flags = 0; | |||
278 | size_t offset; | |||
279 | if (terminalSize
| |||
280 | flags = decodeULEB128(buf, &ulebSize); | |||
281 | callback(cumulativeString, flags); | |||
282 | } | |||
283 | buf += terminalSize; | |||
284 | uint8_t numEdges = *buf++; | |||
| ||||
285 | for (uint8_t i = 0; i < numEdges; ++i) { | |||
286 | const char *cbuf = reinterpret_cast<const char *>(buf); | |||
287 | StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf)); | |||
288 | buf += substring.size() + 1; | |||
289 | offset = decodeULEB128(buf, &ulebSize); | |||
290 | buf += ulebSize; | |||
291 | parse(start + offset, cumulativeString + substring); | |||
292 | } | |||
293 | } | |||
294 | ||||
295 | void macho::parseTrie(const uint8_t *buf, size_t size, | |||
296 | const TrieEntryCallback &callback) { | |||
297 | if (size == 0) | |||
| ||||
298 | return; | |||
299 | ||||
300 | TrieParser(buf, size, callback).parse(); | |||
301 | } |