| File: | build/source/lld/MachO/ExportTrie.cpp |
| Warning: | line 328, column 22 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- ExportTrie.cpp -----------------------------------------------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This is a partial implementation of the Mach-O export trie format. It's | |||
| 10 | // essentially a symbol table encoded as a compressed prefix trie, meaning that | |||
| 11 | // the common prefixes of each symbol name are shared for a more compact | |||
| 12 | // representation. The prefixes are stored on the edges of the trie, and one | |||
| 13 | // edge can represent multiple characters. For example, given two exported | |||
| 14 | // symbols _bar and _baz, we will have a trie like this (terminal nodes are | |||
| 15 | // marked with an asterisk): | |||
| 16 | // | |||
| 17 | // +-+-+ | |||
| 18 | // | | // root node | |||
| 19 | // +-+-+ | |||
| 20 | // | | |||
| 21 | // | _ba | |||
| 22 | // | | |||
| 23 | // +-+-+ | |||
| 24 | // | | | |||
| 25 | // +-+-+ | |||
| 26 | // r / \ z | |||
| 27 | // / \ | |||
| 28 | // +-+-+ +-+-+ | |||
| 29 | // | * | | * | | |||
| 30 | // +-+-+ +-+-+ | |||
| 31 | // | |||
| 32 | // More documentation of the format can be found in | |||
| 33 | // llvm/tools/obj2yaml/macho2yaml.cpp. | |||
| 34 | // | |||
| 35 | //===----------------------------------------------------------------------===// | |||
| 36 | ||||
| 37 | #include "ExportTrie.h" | |||
| 38 | #include "Symbols.h" | |||
| 39 | ||||
| 40 | #include "lld/Common/ErrorHandler.h" | |||
| 41 | #include "lld/Common/Memory.h" | |||
| 42 | #include "llvm/BinaryFormat/MachO.h" | |||
| 43 | #include "llvm/Support/LEB128.h" | |||
| 44 | #include <optional> | |||
| 45 | ||||
| 46 | using namespace llvm; | |||
| 47 | using namespace lld; | |||
| 48 | using namespace lld::macho; | |||
| 49 | ||||
| 50 | namespace { | |||
| 51 | ||||
| 52 | struct Edge { | |||
| 53 | Edge(StringRef s, TrieNode *node) : substring(s), child(node) {} | |||
| 54 | ||||
| 55 | StringRef substring; | |||
| 56 | struct TrieNode *child; | |||
| 57 | }; | |||
| 58 | ||||
| 59 | struct ExportInfo { | |||
| 60 | uint64_t address; | |||
| 61 | uint64_t ordinal = 0; | |||
| 62 | uint8_t flags = 0; | |||
| 63 | ExportInfo(const Symbol &sym, uint64_t imageBase) | |||
| 64 | : address(sym.getVA() - imageBase) { | |||
| 65 | using namespace llvm::MachO; | |||
| 66 | if (sym.isWeakDef()) | |||
| 67 | flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; | |||
| 68 | if (sym.isTlv()) | |||
| 69 | flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; | |||
| 70 | // TODO: Add proper support for stub-and-resolver flags. | |||
| 71 | ||||
| 72 | if (auto *defined = dyn_cast<Defined>(&sym)) { | |||
| 73 | if (defined->isAbsolute()) | |||
| 74 | flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE; | |||
| 75 | } else if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) { | |||
| 76 | flags |= EXPORT_SYMBOL_FLAGS_REEXPORT; | |||
| 77 | if (!dysym->isDynamicLookup()) | |||
| 78 | ordinal = dysym->getFile()->ordinal; | |||
| 79 | } | |||
| 80 | } | |||
| 81 | }; | |||
| 82 | ||||
| 83 | } // namespace | |||
| 84 | ||||
| 85 | struct macho::TrieNode { | |||
| 86 | std::vector<Edge> edges; | |||
| 87 | std::optional<ExportInfo> info; | |||
| 88 | // Estimated offset from the start of the serialized trie to the current node. | |||
| 89 | // This will converge to the true offset when updateOffset() is run to a | |||
| 90 | // fixpoint. | |||
| 91 | size_t offset = 0; | |||
| 92 | ||||
| 93 | uint32_t getTerminalSize() const; | |||
| 94 | // Returns whether the new estimated offset differs from the old one. | |||
| 95 | bool updateOffset(size_t &nextOffset); | |||
| 96 | void writeTo(uint8_t *buf) const; | |||
| 97 | }; | |||
| 98 | ||||
| 99 | // For regular symbols, the node layout (excluding the children) is | |||
| 100 | // | |||
| 101 | // uleb128 terminalSize; | |||
| 102 | // uleb128 flags; | |||
| 103 | // uleb128 address; | |||
| 104 | // | |||
| 105 | // For re-exported symbols, the layout is | |||
| 106 | // | |||
| 107 | // uleb128 terminalSize; | |||
| 108 | // uleb128 flags; | |||
| 109 | // uleb128 ordinal; | |||
| 110 | // char[] originalName; | |||
| 111 | // | |||
| 112 | // If libfoo.dylib is linked against libbar.dylib, and libfoo exports an alias | |||
| 113 | // _foo to a symbol _bar in libbar, then originalName will be "_bar". If libfoo | |||
| 114 | // re-exports _bar directly (i.e. not via an alias), then originalName will be | |||
| 115 | // the empty string. | |||
| 116 | // | |||
| 117 | // TODO: Support aliased re-exports. (Since we don't yet support these, | |||
| 118 | // originalName will always be the empty string.) | |||
| 119 | // | |||
| 120 | // For stub-and-resolver nodes, the layout is | |||
| 121 | // | |||
| 122 | // uleb128 terminalSize; | |||
| 123 | // uleb128 flags; | |||
| 124 | // uleb128 stubAddress; | |||
| 125 | // uleb128 resolverAddress; | |||
| 126 | // | |||
| 127 | // TODO: Support stub-and-resolver nodes. | |||
| 128 | uint32_t TrieNode::getTerminalSize() const { | |||
| 129 | uint32_t size = getULEB128Size(info->flags); | |||
| 130 | if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) | |||
| 131 | size += getULEB128Size(info->ordinal) + 1; // + 1 for the null-terminator | |||
| 132 | else | |||
| 133 | size += getULEB128Size(info->address); | |||
| 134 | return size; | |||
| 135 | } | |||
| 136 | ||||
| 137 | bool TrieNode::updateOffset(size_t &nextOffset) { | |||
| 138 | // Size of the whole node (including the terminalSize and the outgoing edges.) | |||
| 139 | // In contrast, terminalSize only records the size of the other data in the | |||
| 140 | // node. | |||
| 141 | size_t nodeSize; | |||
| 142 | if (info) { | |||
| 143 | uint32_t terminalSize = getTerminalSize(); | |||
| 144 | // Overall node size so far is the uleb128 size of the length of the symbol | |||
| 145 | // info + the symbol info itself. | |||
| 146 | nodeSize = terminalSize + getULEB128Size(terminalSize); | |||
| 147 | } else { | |||
| 148 | nodeSize = 1; // Size of terminalSize (which has a value of 0) | |||
| 149 | } | |||
| 150 | // Compute size of all child edges. | |||
| 151 | ++nodeSize; // Byte for number of children. | |||
| 152 | for (const Edge &edge : edges) { | |||
| 153 | nodeSize += edge.substring.size() + 1 // String length. | |||
| 154 | + getULEB128Size(edge.child->offset); // Offset len. | |||
| 155 | } | |||
| 156 | // On input, 'nextOffset' is the new preferred location for this node. | |||
| 157 | bool result = (offset != nextOffset); | |||
| 158 | // Store new location in node object for use by parents. | |||
| 159 | offset = nextOffset; | |||
| 160 | nextOffset += nodeSize; | |||
| 161 | return result; | |||
| 162 | } | |||
| 163 | ||||
| 164 | void TrieNode::writeTo(uint8_t *buf) const { | |||
| 165 | buf += offset; | |||
| 166 | if (info) { | |||
| 167 | uint32_t terminalSize = getTerminalSize(); | |||
| 168 | buf += encodeULEB128(terminalSize, buf); | |||
| 169 | buf += encodeULEB128(info->flags, buf); | |||
| 170 | if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { | |||
| 171 | buf += encodeULEB128(info->ordinal, buf); | |||
| 172 | *buf++ = 0; // empty originalName string | |||
| 173 | } else { | |||
| 174 | buf += encodeULEB128(info->address, buf); | |||
| 175 | } | |||
| 176 | } else { | |||
| 177 | // TrieNode with no Symbol info. | |||
| 178 | *buf++ = 0; // terminalSize | |||
| 179 | } | |||
| 180 | // Add number of children. TODO: Handle case where we have more than 256. | |||
| 181 | assert(edges.size() < 256)(static_cast <bool> (edges.size() < 256) ? void (0) : __assert_fail ("edges.size() < 256", "lld/MachO/ExportTrie.cpp" , 181, __extension__ __PRETTY_FUNCTION__)); | |||
| 182 | *buf++ = edges.size(); | |||
| 183 | // Append each child edge substring and node offset. | |||
| 184 | for (const Edge &edge : edges) { | |||
| 185 | memcpy(buf, edge.substring.data(), edge.substring.size()); | |||
| 186 | buf += edge.substring.size(); | |||
| 187 | *buf++ = '\0'; | |||
| 188 | buf += encodeULEB128(edge.child->offset, buf); | |||
| 189 | } | |||
| 190 | } | |||
| 191 | ||||
| 192 | TrieBuilder::~TrieBuilder() { | |||
| 193 | for (TrieNode *node : nodes) | |||
| 194 | delete node; | |||
| 195 | } | |||
| 196 | ||||
| 197 | TrieNode *TrieBuilder::makeNode() { | |||
| 198 | auto *node = new TrieNode(); | |||
| 199 | nodes.emplace_back(node); | |||
| 200 | return node; | |||
| 201 | } | |||
| 202 | ||||
| 203 | static int charAt(const Symbol *sym, size_t pos) { | |||
| 204 | StringRef str = sym->getName(); | |||
| 205 | if (pos >= str.size()) | |||
| 206 | return -1; | |||
| 207 | return str[pos]; | |||
| 208 | } | |||
| 209 | ||||
| 210 | // Build the trie by performing a three-way radix quicksort: We start by sorting | |||
| 211 | // the strings by their first characters, then sort the strings with the same | |||
| 212 | // first characters by their second characters, and so on recursively. Each | |||
| 213 | // time the prefixes diverge, we add a node to the trie. | |||
| 214 | // | |||
| 215 | // node: The most recently created node along this path in the trie (i.e. | |||
| 216 | // the furthest from the root.) | |||
| 217 | // lastPos: The prefix length of the most recently created node, i.e. the number | |||
| 218 | // of characters along its path from the root. | |||
| 219 | // pos: The string index we are currently sorting on. Note that each symbol | |||
| 220 | // S contained in vec has the same prefix S[0...pos). | |||
| 221 | void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec, | |||
| 222 | TrieNode *node, size_t lastPos, size_t pos) { | |||
| 223 | tailcall: | |||
| 224 | if (vec.empty()) | |||
| 225 | return; | |||
| 226 | ||||
| 227 | // Partition items so that items in [0, i) are less than the pivot, | |||
| 228 | // [i, j) are the same as the pivot, and [j, vec.size()) are greater than | |||
| 229 | // the pivot. | |||
| 230 | const Symbol *pivotSymbol = vec[vec.size() / 2]; | |||
| 231 | int pivot = charAt(pivotSymbol, pos); | |||
| 232 | size_t i = 0; | |||
| 233 | size_t j = vec.size(); | |||
| 234 | for (size_t k = 0; k < j;) { | |||
| 235 | int c = charAt(vec[k], pos); | |||
| 236 | if (c < pivot) | |||
| 237 | std::swap(vec[i++], vec[k++]); | |||
| 238 | else if (c > pivot) | |||
| 239 | std::swap(vec[--j], vec[k]); | |||
| 240 | else | |||
| 241 | k++; | |||
| 242 | } | |||
| 243 | ||||
| 244 | bool isTerminal = pivot == -1; | |||
| 245 | bool prefixesDiverge = i != 0 || j != vec.size(); | |||
| 246 | if (lastPos != pos && (isTerminal || prefixesDiverge)) { | |||
| 247 | TrieNode *newNode = makeNode(); | |||
| 248 | node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos), | |||
| 249 | newNode); | |||
| 250 | node = newNode; | |||
| 251 | lastPos = pos; | |||
| 252 | } | |||
| 253 | ||||
| 254 | sortAndBuild(vec.slice(0, i), node, lastPos, pos); | |||
| 255 | sortAndBuild(vec.slice(j), node, lastPos, pos); | |||
| 256 | ||||
| 257 | if (isTerminal) { | |||
| 258 | assert(j - i == 1)(static_cast <bool> (j - i == 1) ? void (0) : __assert_fail ("j - i == 1", "lld/MachO/ExportTrie.cpp", 258, __extension__ __PRETTY_FUNCTION__)); // no duplicate symbols | |||
| 259 | node->info = ExportInfo(*pivotSymbol, imageBase); | |||
| 260 | } else { | |||
| 261 | // This is the tail-call-optimized version of the following: | |||
| 262 | // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1); | |||
| 263 | vec = vec.slice(i, j - i); | |||
| 264 | ++pos; | |||
| 265 | goto tailcall; | |||
| 266 | } | |||
| 267 | } | |||
| 268 | ||||
| 269 | size_t TrieBuilder::build() { | |||
| 270 | if (exported.empty()) | |||
| 271 | return 0; | |||
| 272 | ||||
| 273 | TrieNode *root = makeNode(); | |||
| 274 | sortAndBuild(exported, root, 0, 0); | |||
| 275 | ||||
| 276 | // Assign each node in the vector an offset in the trie stream, iterating | |||
| 277 | // until all uleb128 sizes have stabilized. | |||
| 278 | size_t offset; | |||
| 279 | bool more; | |||
| 280 | do { | |||
| 281 | offset = 0; | |||
| 282 | more = false; | |||
| 283 | for (TrieNode *node : nodes) | |||
| 284 | more |= node->updateOffset(offset); | |||
| 285 | } while (more); | |||
| 286 | ||||
| 287 | return offset; | |||
| 288 | } | |||
| 289 | ||||
| 290 | void TrieBuilder::writeTo(uint8_t *buf) const { | |||
| 291 | for (TrieNode *node : nodes) | |||
| 292 | node->writeTo(buf); | |||
| 293 | } | |||
| 294 | ||||
| 295 | namespace { | |||
| 296 | ||||
| 297 | // Parse a serialized trie and invoke a callback for each entry. | |||
| 298 | class TrieParser { | |||
| 299 | public: | |||
| 300 | TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback) | |||
| 301 | : start(buf), end(start + size), callback(callback) {} | |||
| 302 | ||||
| 303 | void parse(const uint8_t *buf, const Twine &cumulativeString); | |||
| 304 | ||||
| 305 | void parse() { parse(start, ""); } | |||
| 306 | ||||
| 307 | const uint8_t *start; | |||
| 308 | const uint8_t *end; | |||
| 309 | const TrieEntryCallback &callback; | |||
| 310 | }; | |||
| 311 | ||||
| 312 | } // namespace | |||
| 313 | ||||
| 314 | void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) { | |||
| 315 | if (buf >= end) | |||
| 316 | fatal("Node offset points outside export section"); | |||
| 317 | ||||
| 318 | unsigned ulebSize; | |||
| 319 | uint64_t terminalSize = decodeULEB128(buf, &ulebSize); | |||
| 320 | buf += ulebSize; | |||
| 321 | uint64_t flags = 0; | |||
| 322 | size_t offset; | |||
| 323 | if (terminalSize
| |||
| 324 | flags = decodeULEB128(buf, &ulebSize); | |||
| 325 | callback(cumulativeString, flags); | |||
| 326 | } | |||
| 327 | buf += terminalSize; | |||
| 328 | uint8_t numEdges = *buf++; | |||
| ||||
| 329 | for (uint8_t i = 0; i < numEdges; ++i) { | |||
| 330 | const char *cbuf = reinterpret_cast<const char *>(buf); | |||
| 331 | StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf)); | |||
| 332 | buf += substring.size() + 1; | |||
| 333 | offset = decodeULEB128(buf, &ulebSize); | |||
| 334 | buf += ulebSize; | |||
| 335 | parse(start + offset, cumulativeString + substring); | |||
| 336 | } | |||
| 337 | } | |||
| 338 | ||||
| 339 | void macho::parseTrie(const uint8_t *buf, size_t size, | |||
| 340 | const TrieEntryCallback &callback) { | |||
| 341 | if (size == 0) | |||
| ||||
| 342 | return; | |||
| 343 | ||||
| 344 | TrieParser(buf, size, callback).parse(); | |||
| 345 | } |