LLVM 20.0.0git
MachOLayoutBuilder.cpp
Go to the documentation of this file.
1//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "llvm/Support/Errc.h"
14
15using namespace llvm;
16using namespace llvm::objcopy::macho;
17
19MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
20 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
24}
25
26uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27 uint32_t Size = 0;
28 for (const LoadCommand &LC : O.LoadCommands) {
30 auto cmd = MLC.load_command_data.cmd;
31 switch (cmd) {
32 case MachO::LC_SEGMENT:
33 Size += sizeof(MachO::segment_command) +
34 sizeof(MachO::section) * LC.Sections.size();
35 continue;
36 case MachO::LC_SEGMENT_64:
38 sizeof(MachO::section_64) * LC.Sections.size();
39 continue;
40 }
41
42 switch (cmd) {
43#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
44 case MachO::LCName: \
45 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
46 break;
47#include "llvm/BinaryFormat/MachO.def"
48#undef HANDLE_LOAD_COMMAND
49 }
50 }
51
52 return Size;
53}
54
55void MachOLayoutBuilder::constructStringTable() {
56 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
57 StrTableBuilder.add(Sym->Name);
58 StrTableBuilder.finalize();
59}
60
61void MachOLayoutBuilder::updateSymbolIndexes() {
62 uint32_t Index = 0;
63 for (auto &Symbol : O.SymTable.Symbols)
64 Symbol->Index = Index++;
65}
66
67// Updates the index and the number of local/external/undefined symbols.
68void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
69 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
70 // Make sure that nlist entries in the symbol table are sorted by the those
71 // types. The order is: local < defined external < undefined external.
72 assert(llvm::is_sorted(O.SymTable.Symbols,
73 [](const std::unique_ptr<SymbolEntry> &A,
74 const std::unique_ptr<SymbolEntry> &B) {
75 bool AL = A->isLocalSymbol(),
76 BL = B->isLocalSymbol();
77 if (AL != BL)
78 return AL;
79 return !AL && !A->isUndefinedSymbol() &&
80 B->isUndefinedSymbol();
81 }) &&
82 "Symbols are not sorted by their types.");
83
84 uint32_t NumLocalSymbols = 0;
85 auto Iter = O.SymTable.Symbols.begin();
86 auto End = O.SymTable.Symbols.end();
87 for (; Iter != End; ++Iter) {
88 if ((*Iter)->isExternalSymbol())
89 break;
90
91 ++NumLocalSymbols;
92 }
93
94 uint32_t NumExtDefSymbols = 0;
95 for (; Iter != End; ++Iter) {
96 if ((*Iter)->isUndefinedSymbol())
97 break;
98
99 ++NumExtDefSymbols;
100 }
101
102 MLC.dysymtab_command_data.ilocalsym = 0;
103 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
104 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
105 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
106 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
107 MLC.dysymtab_command_data.nundefsym =
108 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
109}
110
111// Recomputes and updates offset and size fields in load commands and sections
112// since they could be modified.
113uint64_t MachOLayoutBuilder::layoutSegments() {
114 auto HeaderSize =
115 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
116 const bool IsObjectFile =
117 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
118 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
119 for (LoadCommand &LC : O.LoadCommands) {
120 auto &MLC = LC.MachOLoadCommand;
121 StringRef Segname;
122 uint64_t SegmentVmAddr;
123 uint64_t SegmentVmSize;
124 switch (MLC.load_command_data.cmd) {
125 case MachO::LC_SEGMENT:
126 SegmentVmAddr = MLC.segment_command_data.vmaddr;
127 SegmentVmSize = MLC.segment_command_data.vmsize;
128 Segname = StringRef(MLC.segment_command_data.segname,
129 strnlen(MLC.segment_command_data.segname,
130 sizeof(MLC.segment_command_data.segname)));
131 break;
132 case MachO::LC_SEGMENT_64:
133 SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
134 SegmentVmSize = MLC.segment_command_64_data.vmsize;
135 Segname = StringRef(MLC.segment_command_64_data.segname,
136 strnlen(MLC.segment_command_64_data.segname,
137 sizeof(MLC.segment_command_64_data.segname)));
138 break;
139 default:
140 continue;
141 }
142
143 if (Segname == "__LINKEDIT") {
144 // We update the __LINKEDIT segment later (in layoutTail).
145 assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
146 LinkEditLoadCommand = &MLC;
147 continue;
148 }
149
150 // Update file offsets and sizes of sections.
151 uint64_t SegOffset = Offset;
152 uint64_t SegFileSize = 0;
153 uint64_t VMSize = 0;
154 for (std::unique_ptr<Section> &Sec : LC.Sections) {
155 assert(SegmentVmAddr <= Sec->Addr &&
156 "Section's address cannot be smaller than Segment's one");
157 uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
158 if (IsObjectFile) {
159 if (!Sec->hasValidOffset()) {
160 Sec->Offset = 0;
161 } else {
162 uint64_t PaddingSize =
163 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
164 Sec->Offset = SegOffset + SegFileSize + PaddingSize;
165 Sec->Size = Sec->Content.size();
166 SegFileSize += PaddingSize + Sec->Size;
167 }
168 } else {
169 if (!Sec->hasValidOffset()) {
170 Sec->Offset = 0;
171 } else {
172 Sec->Offset = SegOffset + SectOffset;
173 Sec->Size = Sec->Content.size();
174 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
175 }
176 }
177 VMSize = std::max(VMSize, SectOffset + Sec->Size);
178 }
179
180 if (IsObjectFile) {
181 Offset += SegFileSize;
182 } else {
183 Offset = alignTo(Offset + SegFileSize, PageSize);
184 SegFileSize = alignTo(SegFileSize, PageSize);
185 // Use the original vmsize if the segment is __PAGEZERO.
186 VMSize =
187 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
188 }
189
190 switch (MLC.load_command_data.cmd) {
191 case MachO::LC_SEGMENT:
192 MLC.segment_command_data.cmdsize =
193 sizeof(MachO::segment_command) +
194 sizeof(MachO::section) * LC.Sections.size();
195 MLC.segment_command_data.nsects = LC.Sections.size();
196 MLC.segment_command_data.fileoff = SegOffset;
197 MLC.segment_command_data.vmsize = VMSize;
198 MLC.segment_command_data.filesize = SegFileSize;
199 break;
200 case MachO::LC_SEGMENT_64:
201 MLC.segment_command_64_data.cmdsize =
203 sizeof(MachO::section_64) * LC.Sections.size();
204 MLC.segment_command_64_data.nsects = LC.Sections.size();
205 MLC.segment_command_64_data.fileoff = SegOffset;
206 MLC.segment_command_64_data.vmsize = VMSize;
207 MLC.segment_command_64_data.filesize = SegFileSize;
208 break;
209 }
210 }
211
212 return Offset;
213}
214
215uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
216 for (LoadCommand &LC : O.LoadCommands)
217 for (std::unique_ptr<Section> &Sec : LC.Sections) {
218 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
219 Sec->NReloc = Sec->Relocations.size();
220 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
221 }
222
223 return Offset;
224}
225
226Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
227 // If we are building the layout of an executable or dynamic library
228 // which does not have any segments other than __LINKEDIT,
229 // the Offset can be equal to zero by this time. It happens because of the
230 // convention that in such cases the file offsets specified by LC_SEGMENT
231 // start with zero (unlike the case of a relocatable object file).
232 const uint64_t HeaderSize =
233 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
234 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
235 Offset >= HeaderSize + O.Header.SizeOfCmds) &&
236 "Incorrect tail offset");
237 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
238
239 // The exports trie can be in either LC_DYLD_INFO or in
240 // LC_DYLD_EXPORTS_TRIE, but not both.
241 size_t DyldInfoExportsTrieSize = 0;
242 size_t DyldExportsTrieSize = 0;
243 for (const auto &LC : O.LoadCommands) {
244 switch (LC.MachOLoadCommand.load_command_data.cmd) {
245 case MachO::LC_DYLD_INFO:
246 case MachO::LC_DYLD_INFO_ONLY:
247 DyldInfoExportsTrieSize = O.Exports.Trie.size();
248 break;
249 case MachO::LC_DYLD_EXPORTS_TRIE:
250 DyldExportsTrieSize = O.Exports.Trie.size();
251 break;
252 default:
253 break;
254 }
255 }
256 assert((DyldInfoExportsTrieSize == 0 || DyldExportsTrieSize == 0) &&
257 "Export trie in both LCs");
258
259 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
260 uint64_t StartOfLinkEdit = Offset;
261
262 // The order of LINKEDIT elements is as follows:
263 // rebase info, binding info, weak binding info, lazy binding info, export
264 // trie, chained fixups, dyld exports trie, function starts, data-in-code,
265 // symbol table, indirect symbol table, symbol table strings,
266 // dylib codesign drs, and code signature.
267 auto updateOffset = [&Offset](size_t Size) {
268 uint64_t PreviousOffset = Offset;
269 Offset += Size;
270 return PreviousOffset;
271 };
272
273 uint64_t StartOfRebaseInfo = updateOffset(O.Rebases.Opcodes.size());
274 uint64_t StartOfBindingInfo = updateOffset(O.Binds.Opcodes.size());
275 uint64_t StartOfWeakBindingInfo = updateOffset(O.WeakBinds.Opcodes.size());
276 uint64_t StartOfLazyBindingInfo = updateOffset(O.LazyBinds.Opcodes.size());
277 uint64_t StartOfExportTrie = updateOffset(DyldInfoExportsTrieSize);
278 uint64_t StartOfChainedFixups = updateOffset(O.ChainedFixups.Data.size());
279 uint64_t StartOfDyldExportsTrie = updateOffset(DyldExportsTrieSize);
280 uint64_t StartOfFunctionStarts = updateOffset(O.FunctionStarts.Data.size());
281 uint64_t StartOfDataInCode = updateOffset(O.DataInCode.Data.size());
282 uint64_t StartOfLinkerOptimizationHint =
283 updateOffset(O.LinkerOptimizationHint.Data.size());
284 uint64_t StartOfSymbols = updateOffset(NListSize * O.SymTable.Symbols.size());
285 uint64_t StartOfIndirectSymbols =
286 updateOffset(sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
287 uint64_t StartOfSymbolStrings = updateOffset(StrTableBuilder.getSize());
288 uint64_t StartOfDylibCodeSignDRs = updateOffset(O.DylibCodeSignDRs.Data.size());
289
290 uint64_t StartOfCodeSignature = Offset;
291 uint32_t CodeSignatureSize = 0;
292 if (O.CodeSignatureCommandIndex) {
293 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
294
295 // Note: These calculations are to be kept in sync with the same
296 // calculations performed in LLD's CodeSignatureSection.
297 const uint32_t AllHeadersSize =
298 alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
299 CodeSignature.Align);
300 const uint32_t BlockCount =
301 (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
302 CodeSignature.BlockSize;
303 const uint32_t Size =
304 alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
305 CodeSignature.Align);
306
307 CodeSignature.StartOffset = StartOfCodeSignature;
308 CodeSignature.AllHeadersSize = AllHeadersSize;
309 CodeSignature.BlockCount = BlockCount;
310 CodeSignature.OutputFileName = OutputFileName;
311 CodeSignature.Size = Size;
312 CodeSignatureSize = Size;
313 }
314 uint64_t LinkEditSize =
315 StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
316
317 // Now we have determined the layout of the contents of the __LINKEDIT
318 // segment. Update its load command.
319 if (LinkEditLoadCommand) {
320 MachO::macho_load_command *MLC = LinkEditLoadCommand;
321 switch (LinkEditLoadCommand->load_command_data.cmd) {
322 case MachO::LC_SEGMENT:
323 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
324 MLC->segment_command_data.fileoff = StartOfLinkEdit;
325 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
326 MLC->segment_command_data.filesize = LinkEditSize;
327 break;
328 case MachO::LC_SEGMENT_64:
329 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
330 MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
331 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
332 MLC->segment_command_64_data.filesize = LinkEditSize;
333 break;
334 }
335 }
336
337 for (LoadCommand &LC : O.LoadCommands) {
338 auto &MLC = LC.MachOLoadCommand;
339 auto cmd = MLC.load_command_data.cmd;
340 switch (cmd) {
341 case MachO::LC_CODE_SIGNATURE:
342 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
343 MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
344 break;
345 case MachO::LC_DYLIB_CODE_SIGN_DRS:
346 MLC.linkedit_data_command_data.dataoff = StartOfDylibCodeSignDRs;
347 MLC.linkedit_data_command_data.datasize = O.DylibCodeSignDRs.Data.size();
348 break;
349 case MachO::LC_SYMTAB:
350 MLC.symtab_command_data.symoff = StartOfSymbols;
351 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
352 MLC.symtab_command_data.stroff = StartOfSymbolStrings;
353 MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
354 break;
355 case MachO::LC_DYSYMTAB: {
356 if (MLC.dysymtab_command_data.ntoc != 0 ||
357 MLC.dysymtab_command_data.nmodtab != 0 ||
358 MLC.dysymtab_command_data.nextrefsyms != 0 ||
359 MLC.dysymtab_command_data.nlocrel != 0 ||
360 MLC.dysymtab_command_data.nextrel != 0)
362 "shared library is not yet supported");
363 MLC.dysymtab_command_data.indirectsymoff =
364 O.IndirectSymTable.Symbols.size() ? StartOfIndirectSymbols : 0;
365 MLC.dysymtab_command_data.nindirectsyms =
366 O.IndirectSymTable.Symbols.size();
367 updateDySymTab(MLC);
368 break;
369 }
370 case MachO::LC_DATA_IN_CODE:
371 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
372 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
373 break;
374 case MachO::LC_LINKER_OPTIMIZATION_HINT:
375 MLC.linkedit_data_command_data.dataoff = StartOfLinkerOptimizationHint;
376 MLC.linkedit_data_command_data.datasize =
377 O.LinkerOptimizationHint.Data.size();
378 break;
379 case MachO::LC_FUNCTION_STARTS:
380 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
381 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
382 break;
383 case MachO::LC_DYLD_CHAINED_FIXUPS:
384 MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
385 MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
386 break;
387 case MachO::LC_DYLD_EXPORTS_TRIE:
388 MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
389 MLC.linkedit_data_command_data.datasize = DyldExportsTrieSize;
390 break;
391 case MachO::LC_DYLD_INFO:
392 case MachO::LC_DYLD_INFO_ONLY:
393 MLC.dyld_info_command_data.rebase_off =
394 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
395 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
396 MLC.dyld_info_command_data.bind_off =
397 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
398 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
399 MLC.dyld_info_command_data.weak_bind_off =
400 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
401 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
402 MLC.dyld_info_command_data.lazy_bind_off =
403 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
404 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
405 MLC.dyld_info_command_data.export_off =
406 O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
407 MLC.dyld_info_command_data.export_size = DyldInfoExportsTrieSize;
408 break;
409 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
410 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
411 // relative virtual address. At the moment modification of the __TEXT
412 // segment of executables isn't supported anyway (e.g. data in code entries
413 // are not recalculated). Moreover, in general
414 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
415 // without making additional assumptions (e.g. that the entire __TEXT
416 // segment should be encrypted) we do not know how to recalculate the
417 // boundaries of the encrypted part. For now just copy over these load
418 // commands until we encounter a real world usecase where
419 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
420 case MachO::LC_ENCRYPTION_INFO:
421 case MachO::LC_ENCRYPTION_INFO_64:
422 case MachO::LC_LOAD_DYLINKER:
423 case MachO::LC_MAIN:
424 case MachO::LC_RPATH:
425 case MachO::LC_SEGMENT:
426 case MachO::LC_SEGMENT_64:
427 case MachO::LC_VERSION_MIN_MACOSX:
428 case MachO::LC_VERSION_MIN_IPHONEOS:
429 case MachO::LC_VERSION_MIN_TVOS:
430 case MachO::LC_VERSION_MIN_WATCHOS:
431 case MachO::LC_BUILD_VERSION:
432 case MachO::LC_ID_DYLIB:
433 case MachO::LC_LOAD_DYLIB:
434 case MachO::LC_LOAD_WEAK_DYLIB:
435 case MachO::LC_UUID:
436 case MachO::LC_SOURCE_VERSION:
437 case MachO::LC_THREAD:
438 case MachO::LC_UNIXTHREAD:
439 case MachO::LC_SUB_FRAMEWORK:
440 case MachO::LC_SUB_UMBRELLA:
441 case MachO::LC_SUB_CLIENT:
442 case MachO::LC_SUB_LIBRARY:
443 case MachO::LC_LINKER_OPTION:
444 // Nothing to update.
445 break;
446 default:
447 // Abort if it's unsupported in order to prevent corrupting the object.
449 "unsupported load command (cmd=0x%x)", cmd);
450 }
451 }
452
453 return Error::success();
454}
455
457 O.Header.NCmds = O.LoadCommands.size();
458 O.Header.SizeOfCmds = computeSizeOfCmds();
459 constructStringTable();
460 updateSymbolIndexes();
461 uint64_t Offset = layoutSegments();
462 Offset = layoutRelocations(Offset);
463 return layoutTail(Offset);
464}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Symbol * Sym
Definition: ELF_riscv.cpp:479
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
size_t add(CachedHashStringRef S)
Add a string to the builder.
void finalize()
Analyze the strings and build the final table.
@ HeaderSize
Definition: BTF.h:61
@ MH_OBJECT
Definition: MachO.h:43
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1291
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1926
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static constexpr uint32_t FixedHeadersSize
MachO::macho_load_command MachOLoadCommand
Definition: MachOObject.h:82
std::vector< std::unique_ptr< Section > > Sections
Definition: MachOObject.h:93