LLVM 20.0.0git
MachOObject.cpp
Go to the documentation of this file.
1//===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOObject.h"
12#include <unordered_set>
13
14using namespace llvm;
15using namespace llvm::objcopy::macho;
16
18 : Segname(SegName), Sectname(SectName),
19 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
20
22 : Segname(SegName), Sectname(SectName),
23 CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
25
27 assert(Index < Symbols.size() && "invalid symbol index");
28 return Symbols[Index].get();
29}
30
32 return const_cast<SymbolEntry *>(
33 static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
34}
35
37 for (auto &Sym : Symbols)
38 Callable(*Sym);
39
40 // Partition symbols: local < defined external < undefined external.
41 auto ExternalBegin = std::stable_partition(
42 std::begin(Symbols), std::end(Symbols),
43 [](const auto &Sym) { return Sym->isLocalSymbol(); });
44 std::stable_partition(ExternalBegin, std::end(Symbols), [](const auto &Sym) {
45 return !Sym->isUndefinedSymbol();
46 });
47}
48
50 function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
52}
53
55 static constexpr char TextSegmentName[] = "__TEXT";
56 // Update indices of special load commands
57 for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
58 LoadCommand &LC = LoadCommands[Index];
59 switch (LC.MachOLoadCommand.load_command_data.cmd) {
60 case MachO::LC_CODE_SIGNATURE:
62 break;
63 case MachO::LC_SEGMENT:
64 if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
65 TextSegmentName)
67 break;
68 case MachO::LC_SEGMENT_64:
69 if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
70 TextSegmentName)
72 break;
73 case MachO::LC_SYMTAB:
74 SymTabCommandIndex = Index;
75 break;
76 case MachO::LC_DYSYMTAB:
78 break;
79 case MachO::LC_DYLD_INFO:
80 case MachO::LC_DYLD_INFO_ONLY:
82 break;
83 case MachO::LC_DATA_IN_CODE:
85 break;
86 case MachO::LC_LINKER_OPTIMIZATION_HINT:
88 break;
89 case MachO::LC_FUNCTION_STARTS:
91 break;
92 case MachO::LC_DYLIB_CODE_SIGN_DRS:
94 break;
95 case MachO::LC_DYLD_CHAINED_FIXUPS:
97 break;
98 case MachO::LC_DYLD_EXPORTS_TRIE:
100 break;
101 case MachO::LC_ENCRYPTION_INFO:
102 case MachO::LC_ENCRYPTION_INFO_64:
104 break;
105 }
106 }
107}
108
110 function_ref<bool(const LoadCommand &)> ToRemove) {
111 auto It = std::stable_partition(
112 LoadCommands.begin(), LoadCommands.end(),
113 [&](const LoadCommand &LC) { return !ToRemove(LC); });
114 LoadCommands.erase(It, LoadCommands.end());
115
117 return Error::success();
118}
119
121 function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
122 DenseMap<uint32_t, const Section *> OldIndexToSection;
123 uint32_t NextSectionIndex = 1;
124 for (LoadCommand &LC : LoadCommands) {
125 auto It = std::stable_partition(
126 std::begin(LC.Sections), std::end(LC.Sections),
127 [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
128 for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
129 OldIndexToSection[(*I)->Index] = I->get();
130 (*I)->Index = NextSectionIndex++;
131 }
132 LC.Sections.erase(It, LC.Sections.end());
133 }
134
135 auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
136 std::optional<uint32_t> Section = S->section();
137 return (Section && !OldIndexToSection.count(*Section));
138 };
139
141 for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
142 if (IsDead(Sym))
143 DeadSymbols.insert(Sym.get());
144
145 for (const LoadCommand &LC : LoadCommands)
146 for (const std::unique_ptr<Section> &Sec : LC.Sections)
147 for (const RelocationInfo &R : Sec->Relocations)
148 if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
149 return createStringError(std::errc::invalid_argument,
150 "symbol '%s' defined in section with index "
151 "'%u' cannot be removed because it is "
152 "referenced by a relocation in section '%s'",
153 (*R.Symbol)->Name.c_str(),
154 *((*R.Symbol)->section()),
155 Sec->CanonicalName.c_str());
157 for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
158 if (S->section())
159 S->n_sect = OldIndexToSection[S->n_sect]->Index;
160 return Error::success();
161}
162
164 uint64_t HeaderSize =
166 uint64_t Addr = HeaderSize + Header.SizeOfCmds;
167 for (const LoadCommand &LC : LoadCommands) {
168 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
169 switch (MLC.load_command_data.cmd) {
170 case MachO::LC_SEGMENT:
171 Addr = std::max(Addr,
172 static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
173 MLC.segment_command_data.vmsize);
174 break;
175 case MachO::LC_SEGMENT_64:
176 Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
177 MLC.segment_command_64_data.vmsize);
178 break;
179 default:
180 continue;
181 }
182 }
183 return Addr;
184}
185
186template <typename SegmentType>
187static void
189 StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
190 assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
191 memset(&Seg, 0, sizeof(SegmentType));
192 Seg.cmd = CmdType;
193 strncpy(Seg.segname, SegName.data(), SegName.size());
194 Seg.maxprot |=
196 Seg.initprot |=
198 Seg.vmaddr = SegVMAddr;
199 Seg.vmsize = SegVMSize;
200}
201
203 LoadCommand LC;
204 const uint64_t SegVMAddr = nextAvailableSegmentAddress();
205 if (is64Bit())
206 constructSegment(LC.MachOLoadCommand.segment_command_64_data,
207 MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
208 else
209 constructSegment(LC.MachOLoadCommand.segment_command_data,
210 MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
211
212 LoadCommands.push_back(std::move(LC));
213 return LoadCommands.back();
214}
215
216/// Extracts a segment name from a string which is possibly non-null-terminated.
217static StringRef extractSegmentName(const char *SegName) {
218 return StringRef(SegName,
219 strnlen(SegName, sizeof(MachO::segment_command::segname)));
220}
221
222std::optional<StringRef> LoadCommand::getSegmentName() const {
224 switch (MLC.load_command_data.cmd) {
225 case MachO::LC_SEGMENT:
226 return extractSegmentName(MLC.segment_command_data.segname);
227 case MachO::LC_SEGMENT_64:
228 return extractSegmentName(MLC.segment_command_64_data.segname);
229 default:
230 return std::nullopt;
231 }
232}
233
234std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
236 switch (MLC.load_command_data.cmd) {
237 case MachO::LC_SEGMENT:
238 return MLC.segment_command_data.vmaddr;
239 case MachO::LC_SEGMENT_64:
240 return MLC.segment_command_64_data.vmaddr;
241 default:
242 return std::nullopt;
243 }
244}
ReachingDefAnalysis InstSet & ToRemove
T Content
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
Symbol * Sym
Definition: ELF_riscv.cpp:479
#define I(x, y, z)
Definition: MD5.cpp:58
static void constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize)
static StringRef extractSegmentName(const char *SegName)
Extracts a segment name from a string which is possibly non-null-terminated.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool IsDead
This file defines the SmallPtrSet class.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
An efficient, type-erasing, non-owning reference to a callable.
@ VM_PROT_EXECUTE
Definition: MachO.h:497
@ VM_PROT_READ
Definition: MachO.h:497
@ VM_PROT_WRITE
Definition: MachO.h:497
LoadCommandType
Definition: MachO.h:98
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1291
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
MachO::macho_load_command MachOLoadCommand
Definition: MachOObject.h:82
std::optional< StringRef > getSegmentName() const
std::optional< uint64_t > getSegmentVMAddr() const
std::optional< size_t > FunctionStartsCommandIndex
The index LC_FUNCTION_STARTS load command if present.
Definition: MachOObject.h:336
Error removeLoadCommands(function_ref< bool(const LoadCommand &)> ToRemove)
std::optional< size_t > ChainedFixupsCommandIndex
The index LC_DYLD_CHAINED_FIXUPS load command if present.
Definition: MachOObject.h:338
std::optional< size_t > ExportsTrieCommandIndex
The index LC_DYLD_EXPORTS_TRIE load command if present.
Definition: MachOObject.h:340
Error removeSections(function_ref< bool(const std::unique_ptr< Section > &)> ToRemove)
std::optional< size_t > DylibCodeSignDRsIndex
The index of LC_DYLIB_CODE_SIGN_DRS load command if present.
Definition: MachOObject.h:324
std::optional< size_t > SymTabCommandIndex
The index of LC_SYMTAB load command if present.
Definition: MachOObject.h:326
std::optional< size_t > DyLdInfoCommandIndex
The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Definition: MachOObject.h:328
std::vector< LoadCommand > LoadCommands
Definition: MachOObject.h:301
std::optional< size_t > DataInCodeCommandIndex
The index LC_DATA_IN_CODE load command if present.
Definition: MachOObject.h:332
std::optional< size_t > EncryptionInfoCommandIndex
The index of the LC_ENCRYPTION_INFO or LC_ENCRYPTION_INFO_64 load command if present.
Definition: MachOObject.h:346
std::optional< size_t > DySymTabCommandIndex
The index LC_DYSYMTAB load command if present.
Definition: MachOObject.h:330
std::optional< size_t > TextSegmentCommandIndex
The index of the LC_SEGMENT or LC_SEGMENT_64 load command corresponding to the __TEXT segment.
Definition: MachOObject.h:343
uint64_t nextAvailableSegmentAddress() const
std::optional< size_t > CodeSignatureCommandIndex
The index of LC_CODE_SIGNATURE load command if present.
Definition: MachOObject.h:322
LoadCommand & addSegment(StringRef SegName, uint64_t SegVMSize)
Creates a new segment load command in the object and returns a reference to the newly created load co...
std::optional< size_t > LinkerOptimizationHintCommandIndex
The index of LC_LINKER_OPTIMIZATIN_HINT load command if present.
Definition: MachOObject.h:334
Section(StringRef SegName, StringRef SectName)
Definition: MachOObject.cpp:17
The location of the symbol table inside the binary is described by LC_SYMTAB load command.
Definition: MachOObject.h:134
void updateSymbols(function_ref< void(SymbolEntry &)> Callable)
Definition: MachOObject.cpp:36
const SymbolEntry * getSymbolByIndex(uint32_t Index) const
Definition: MachOObject.cpp:26
std::vector< std::unique_ptr< SymbolEntry > > Symbols
Definition: MachOObject.h:135
void removeSymbols(function_ref< bool(const std::unique_ptr< SymbolEntry > &)> ToRemove)
Definition: MachOObject.cpp:49