LLVM 18.0.0git
MachOLayoutBuilder.cpp
Go to the documentation of this file.
1//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/Support/Errc.h"
13
14using namespace llvm;
15using namespace llvm::objcopy::macho;
16
18MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
19 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
23}
24
25uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
26 uint32_t Size = 0;
27 for (const LoadCommand &LC : O.LoadCommands) {
29 auto cmd = MLC.load_command_data.cmd;
30 switch (cmd) {
31 case MachO::LC_SEGMENT:
32 Size += sizeof(MachO::segment_command) +
33 sizeof(MachO::section) * LC.Sections.size();
34 continue;
35 case MachO::LC_SEGMENT_64:
37 sizeof(MachO::section_64) * LC.Sections.size();
38 continue;
39 }
40
41 switch (cmd) {
42#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
43 case MachO::LCName: \
44 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
45 break;
46#include "llvm/BinaryFormat/MachO.def"
47#undef HANDLE_LOAD_COMMAND
48 }
49 }
50
51 return Size;
52}
53
54void MachOLayoutBuilder::constructStringTable() {
55 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
56 StrTableBuilder.add(Sym->Name);
57 StrTableBuilder.finalize();
58}
59
60void MachOLayoutBuilder::updateSymbolIndexes() {
61 uint32_t Index = 0;
62 for (auto &Symbol : O.SymTable.Symbols)
63 Symbol->Index = Index++;
64}
65
66// Updates the index and the number of local/external/undefined symbols.
67void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
68 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
69 // Make sure that nlist entries in the symbol table are sorted by the those
70 // types. The order is: local < defined external < undefined external.
71 assert(llvm::is_sorted(O.SymTable.Symbols,
72 [](const std::unique_ptr<SymbolEntry> &A,
73 const std::unique_ptr<SymbolEntry> &B) {
74 bool AL = A->isLocalSymbol(),
75 BL = B->isLocalSymbol();
76 if (AL != BL)
77 return AL;
78 return !AL && !A->isUndefinedSymbol() &&
79 B->isUndefinedSymbol();
80 }) &&
81 "Symbols are not sorted by their types.");
82
83 uint32_t NumLocalSymbols = 0;
84 auto Iter = O.SymTable.Symbols.begin();
85 auto End = O.SymTable.Symbols.end();
86 for (; Iter != End; ++Iter) {
87 if ((*Iter)->isExternalSymbol())
88 break;
89
90 ++NumLocalSymbols;
91 }
92
93 uint32_t NumExtDefSymbols = 0;
94 for (; Iter != End; ++Iter) {
95 if ((*Iter)->isUndefinedSymbol())
96 break;
97
98 ++NumExtDefSymbols;
99 }
100
101 MLC.dysymtab_command_data.ilocalsym = 0;
102 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
103 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
104 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
105 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
106 MLC.dysymtab_command_data.nundefsym =
107 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
108}
109
110// Recomputes and updates offset and size fields in load commands and sections
111// since they could be modified.
112uint64_t MachOLayoutBuilder::layoutSegments() {
113 auto HeaderSize =
114 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
115 const bool IsObjectFile =
116 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
117 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
118 for (LoadCommand &LC : O.LoadCommands) {
119 auto &MLC = LC.MachOLoadCommand;
120 StringRef Segname;
121 uint64_t SegmentVmAddr;
122 uint64_t SegmentVmSize;
123 switch (MLC.load_command_data.cmd) {
124 case MachO::LC_SEGMENT:
125 SegmentVmAddr = MLC.segment_command_data.vmaddr;
126 SegmentVmSize = MLC.segment_command_data.vmsize;
127 Segname = StringRef(MLC.segment_command_data.segname,
128 strnlen(MLC.segment_command_data.segname,
129 sizeof(MLC.segment_command_data.segname)));
130 break;
131 case MachO::LC_SEGMENT_64:
132 SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
133 SegmentVmSize = MLC.segment_command_64_data.vmsize;
134 Segname = StringRef(MLC.segment_command_64_data.segname,
135 strnlen(MLC.segment_command_64_data.segname,
136 sizeof(MLC.segment_command_64_data.segname)));
137 break;
138 default:
139 continue;
140 }
141
142 if (Segname == "__LINKEDIT") {
143 // We update the __LINKEDIT segment later (in layoutTail).
144 assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
145 LinkEditLoadCommand = &MLC;
146 continue;
147 }
148
149 // Update file offsets and sizes of sections.
150 uint64_t SegOffset = Offset;
151 uint64_t SegFileSize = 0;
152 uint64_t VMSize = 0;
153 for (std::unique_ptr<Section> &Sec : LC.Sections) {
154 assert(SegmentVmAddr <= Sec->Addr &&
155 "Section's address cannot be smaller than Segment's one");
156 uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
157 if (IsObjectFile) {
158 if (!Sec->hasValidOffset()) {
159 Sec->Offset = 0;
160 } else {
161 uint64_t PaddingSize =
162 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
163 Sec->Offset = SegOffset + SegFileSize + PaddingSize;
164 Sec->Size = Sec->Content.size();
165 SegFileSize += PaddingSize + Sec->Size;
166 }
167 } else {
168 if (!Sec->hasValidOffset()) {
169 Sec->Offset = 0;
170 } else {
171 Sec->Offset = SegOffset + SectOffset;
172 Sec->Size = Sec->Content.size();
173 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
174 }
175 }
176 VMSize = std::max(VMSize, SectOffset + Sec->Size);
177 }
178
179 if (IsObjectFile) {
180 Offset += SegFileSize;
181 } else {
182 Offset = alignTo(Offset + SegFileSize, PageSize);
183 SegFileSize = alignTo(SegFileSize, PageSize);
184 // Use the original vmsize if the segment is __PAGEZERO.
185 VMSize =
186 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
187 }
188
189 switch (MLC.load_command_data.cmd) {
190 case MachO::LC_SEGMENT:
191 MLC.segment_command_data.cmdsize =
192 sizeof(MachO::segment_command) +
193 sizeof(MachO::section) * LC.Sections.size();
194 MLC.segment_command_data.nsects = LC.Sections.size();
195 MLC.segment_command_data.fileoff = SegOffset;
196 MLC.segment_command_data.vmsize = VMSize;
197 MLC.segment_command_data.filesize = SegFileSize;
198 break;
199 case MachO::LC_SEGMENT_64:
200 MLC.segment_command_64_data.cmdsize =
202 sizeof(MachO::section_64) * LC.Sections.size();
203 MLC.segment_command_64_data.nsects = LC.Sections.size();
204 MLC.segment_command_64_data.fileoff = SegOffset;
205 MLC.segment_command_64_data.vmsize = VMSize;
206 MLC.segment_command_64_data.filesize = SegFileSize;
207 break;
208 }
209 }
210
211 return Offset;
212}
213
214uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
215 for (LoadCommand &LC : O.LoadCommands)
216 for (std::unique_ptr<Section> &Sec : LC.Sections) {
217 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
218 Sec->NReloc = Sec->Relocations.size();
219 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
220 }
221
222 return Offset;
223}
224
225Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
226 // If we are building the layout of an executable or dynamic library
227 // which does not have any segments other than __LINKEDIT,
228 // the Offset can be equal to zero by this time. It happens because of the
229 // convention that in such cases the file offsets specified by LC_SEGMENT
230 // start with zero (unlike the case of a relocatable object file).
231 const uint64_t HeaderSize =
232 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
233 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
234 Offset >= HeaderSize + O.Header.SizeOfCmds) &&
235 "Incorrect tail offset");
236 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
237
238 // The exports trie can be in either LC_DYLD_INFO or in
239 // LC_DYLD_EXPORTS_TRIE, but not both.
240 size_t DyldInfoExportsTrieSize = 0;
241 size_t DyldExportsTrieSize = 0;
242 for (const auto &LC : O.LoadCommands) {
243 switch (LC.MachOLoadCommand.load_command_data.cmd) {
244 case MachO::LC_DYLD_INFO:
245 case MachO::LC_DYLD_INFO_ONLY:
246 DyldInfoExportsTrieSize = O.Exports.Trie.size();
247 break;
248 case MachO::LC_DYLD_EXPORTS_TRIE:
249 DyldExportsTrieSize = O.Exports.Trie.size();
250 break;
251 default:
252 break;
253 }
254 }
255 assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) &&
256 "Export trie in both LCs");
257
258 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
259 uint64_t StartOfLinkEdit = Offset;
260
261 // The order of LINKEDIT elements is as follows:
262 // rebase info, binding info, weak binding info, lazy binding info, export
263 // trie, chained fixups, dyld exports trie, function starts, data-in-code,
264 // symbol table, indirect symbol table, symbol table strings,
265 // dylib codesign drs, and code signature.
266 auto updateOffset = [&Offset](size_t Size) {
267 uint64_t PreviousOffset = Offset;
268 Offset += Size;
269 return PreviousOffset;
270 };
271
272 uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size());
273 uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size());
274 uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size());
275 uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size());
276 uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize);
277 uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size());
278 uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize);
279 uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size());
280 uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size());
281 uint64_t StartOfLinkerOptimizationHint =
282 updateOffset(O.LinkerOptimizationHint.Data.size());
283 uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size());
284 uint64_t StartOfIndirectSymbols =
285 updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
286 uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize());
287 uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size());
288
289 uint64_t StartOfCodeSignature = Offset;
290 uint32_t CodeSignatureSize = 0;
291 if (O.CodeSignatureCommandIndex) {
292 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
293
294 // Note: These calculations are to be kept in sync with the same
295 // calculations performed in LLD's CodeSignatureSection.
296 const uint32_t AllHeadersSize =
297 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
298 CodeSignature.Align);
299 const uint32_t BlockCount =
300 (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
301 CodeSignature.BlockSize;
302 const uint32_t Size =
303 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
304 CodeSignature.Align);
305
306 CodeSignature.StartOffset = StartOfCodeSignature;
307 CodeSignature.AllHeadersSize = AllHeadersSize;
308 CodeSignature.BlockCount = BlockCount;
309 CodeSignature.OutputFileName = OutputFileName;
310 CodeSignature.Size = Size;
311 CodeSignatureSize = Size;
312 }
313 uint64_t LinkEditSize =
314 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
315
316 // Now we have determined the layout of the contents of the __LINKEDIT
317 // segment. Update its load command.
318 if (LinkEditLoadCommand) {
319 MachO::macho_load_command *MLC = LinkEditLoadCommand;
320 switch (LinkEditLoadCommand->load_command_data.cmd) {
321 case MachO::LC_SEGMENT:
322 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
323 MLC->segment_command_data.fileoff = StartOfLinkEdit;
324 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
325 MLC->segment_command_data.filesize = LinkEditSize;
326 break;
327 case MachO::LC_SEGMENT_64:
328 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
329 MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
330 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
331 MLC->segment_command_64_data.filesize = LinkEditSize;
332 break;
333 }
334 }
335
336 for (LoadCommand &LC : O.LoadCommands) {
337 auto &MLC = LC.MachOLoadCommand;
338 auto cmd = MLC.load_command_data.cmd;
339 switch (cmd) {
340 case MachO::LC_CODE_SIGNATURE:
341 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
342 MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
343 break;
344 case MachO::LC_DYLIB_CODE_SIGN_DRS:
345 MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs;
346 MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size();
347 break;
348 case MachO::LC_SYMTAB:
349 MLC.symtab_command_data.symoff = StartOfSymbols;
350 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
351 MLC.symtab_command_data.stroff = StartOfSymbolStrings;
352 MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
353 break;
354 case MachO::LC_DYSYMTAB: {
355 if (MLC.dysymtab_command_data.ntoc != 0 ||
356 MLC.dysymtab_command_data.nmodtab != 0 ||
357 MLC.dysymtab_command_data.nextrefsyms != 0 ||
358 MLC.dysymtab_command_data.nlocrel != 0 ||
359 MLC.dysymtab_command_data.nextrel != 0)
361 "shared library is not yet supported");
362
363 if (!O.IndirectSymTable.Symbols.empty()) {
364 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
365 MLC.dysymtab_command_data.nindirectsyms =
366 O.IndirectSymTable.Symbols.size();
367 }
368
369 updateDySymTab(MLC);
370 break;
371 }
372 case MachO::LC_DATA_IN_CODE:
373 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
374 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
375 break;
376 case MachO::LC_LINKER_OPTIMIZATION_HINT:
377 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
378 MLC.linkedit_data_command_data.datasize =
379 O.LinkerOptimizationHint.Data.size();
380 break;
381 case MachO::LC_FUNCTION_STARTS:
382 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
383 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
384 break;
385 case MachO::LC_DYLD_CHAINED_FIXUPS:
386 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
387 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
388 break;
389 case MachO::LC_DYLD_EXPORTS_TRIE:
390 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
391 MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize;
392 break;
393 case MachO::LC_DYLD_INFO:
394 case MachO::LC_DYLD_INFO_ONLY:
395 MLC.dyld_info_command_data.rebase_off =
396 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
397 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
398 MLC.dyld_info_command_data.bind_off =
399 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
400 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
401 MLC.dyld_info_command_data.weak_bind_off =
402 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
403 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
404 MLC.dyld_info_command_data.lazy_bind_off =
405 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
406 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
407 MLC.dyld_info_command_data.export_off =
408 O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
409 MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize;
410 break;
411 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
412 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
413 // relative virtual address. At the moment modification of the __TEXT
414 // segment of executables isn't supported anyway (e.g. data in code entries
415 // are not recalculated). Moreover, in general
416 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
417 // without making additional assumptions (e.g. that the entire __TEXT
418 // segment should be encrypted) we do not know how to recalculate the
419 // boundaries of the encrypted part. For now just copy over these load
420 // commands until we encounter a real world usecase where
421 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
422 case MachO::LC_ENCRYPTION_INFO:
423 case MachO::LC_ENCRYPTION_INFO_64:
424 case MachO::LC_LOAD_DYLINKER:
425 case MachO::LC_MAIN:
426 case MachO::LC_RPATH:
427 case MachO::LC_SEGMENT:
428 case MachO::LC_SEGMENT_64:
429 case MachO::LC_VERSION_MIN_MACOSX:
430 case MachO::LC_VERSION_MIN_IPHONEOS:
431 case MachO::LC_VERSION_MIN_TVOS:
432 case MachO::LC_VERSION_MIN_WATCHOS:
433 case MachO::LC_BUILD_VERSION:
434 case MachO::LC_ID_DYLIB:
435 case MachO::LC_LOAD_DYLIB:
436 case MachO::LC_LOAD_WEAK_DYLIB:
437 case MachO::LC_UUID:
438 case MachO::LC_SOURCE_VERSION:
439 case MachO::LC_THREAD:
440 case MachO::LC_UNIXTHREAD:
441 case MachO::LC_SUB_FRAMEWORK:
442 case MachO::LC_SUB_UMBRELLA:
443 case MachO::LC_SUB_CLIENT:
444 case MachO::LC_SUB_LIBRARY:
445 case MachO::LC_LINKER_OPTION:
446 // Nothing to update.
447 break;
448 default:
449 // Abort if it's unsupported in order to prevent corrupting the object.
451 "unsupported load command (cmd=0x%x)", cmd);
452 }
453 }
454
455 return Error::success();
456}
457
459 O.Header.NCmds = O.LoadCommands.size();
460 O.Header.SizeOfCmds = computeSizeOfCmds();
461 constructStringTable();
462 updateSymbolIndexes();
463 uint64_t Offset = layoutSegments();
464 Offset = layoutRelocations(Offset);
465 return layoutTail(Offset);
466}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
Symbol * Sym
Definition: ELF_riscv.cpp:477
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:334
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
size_t add(CachedHashStringRef S)
Add a string to the builder.
void finalize()
Analyze the strings and build the final table.
@ HeaderSize
Definition: BTF.h:61
@ MH_OBJECT
Definition: MachO.h:43
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1244
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1906
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static constexpr uint32_t FixedHeadersSize
MachO::macho_load_command MachOLoadCommand
Definition: MachOObject.h:82
std::vector< std::unique_ptr< Section > > Sections
Definition: MachOObject.h:93