LLVM  17.0.0git
MachOObject.cpp
Go to the documentation of this file.
1 //===- MachOObject.cpp - Mach-O object file model ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObject.h"
10 #include "llvm/ADT/SmallPtrSet.h"
11 #include <unordered_set>
12 
13 using namespace llvm;
14 using namespace llvm::objcopy::macho;
15 
17  : Segname(SegName), Sectname(SectName),
18  CanonicalName((Twine(SegName) + Twine(',') + SectName).str()) {}
19 
21  : Segname(SegName), Sectname(SectName),
22  CanonicalName((Twine(SegName) + Twine(',') + SectName).str()),
23  Content(Content) {}
24 
26  assert(Index < Symbols.size() && "invalid symbol index");
27  return Symbols[Index].get();
28 }
29 
31  return const_cast<SymbolEntry *>(
32  static_cast<const SymbolTable *>(this)->getSymbolByIndex(Index));
33 }
34 
36  function_ref<bool(const std::unique_ptr<SymbolEntry> &)> ToRemove) {
38 }
39 
41  static constexpr char TextSegmentName[] = "__TEXT";
42  // Update indices of special load commands
43  for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
45  switch (LC.MachOLoadCommand.load_command_data.cmd) {
46  case MachO::LC_CODE_SIGNATURE:
48  break;
49  case MachO::LC_SEGMENT:
50  if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
51  TextSegmentName)
53  break;
54  case MachO::LC_SEGMENT_64:
55  if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
56  TextSegmentName)
58  break;
59  case MachO::LC_SYMTAB:
61  break;
62  case MachO::LC_DYSYMTAB:
64  break;
65  case MachO::LC_DYLD_INFO:
66  case MachO::LC_DYLD_INFO_ONLY:
68  break;
69  case MachO::LC_DATA_IN_CODE:
71  break;
72  case MachO::LC_LINKER_OPTIMIZATION_HINT:
74  break;
75  case MachO::LC_FUNCTION_STARTS:
77  break;
78  case MachO::LC_DYLIB_CODE_SIGN_DRS:
80  break;
81  case MachO::LC_DYLD_CHAINED_FIXUPS:
83  break;
84  case MachO::LC_DYLD_EXPORTS_TRIE:
86  break;
87  }
88  }
89 }
90 
92  function_ref<bool(const LoadCommand &)> ToRemove) {
93  auto It = std::stable_partition(
94  LoadCommands.begin(), LoadCommands.end(),
95  [&](const LoadCommand &LC) { return !ToRemove(LC); });
96  LoadCommands.erase(It, LoadCommands.end());
97 
99  return Error::success();
100 }
101 
103  function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
104  DenseMap<uint32_t, const Section *> OldIndexToSection;
105  uint32_t NextSectionIndex = 1;
106  for (LoadCommand &LC : LoadCommands) {
107  auto It = std::stable_partition(
108  std::begin(LC.Sections), std::end(LC.Sections),
109  [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
110  for (auto I = LC.Sections.begin(), End = It; I != End; ++I) {
111  OldIndexToSection[(*I)->Index] = I->get();
112  (*I)->Index = NextSectionIndex++;
113  }
114  LC.Sections.erase(It, LC.Sections.end());
115  }
116 
117  auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
118  std::optional<uint32_t> Section = S->section();
119  return (Section && !OldIndexToSection.count(*Section));
120  };
121 
123  for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
124  if (IsDead(Sym))
125  DeadSymbols.insert(Sym.get());
126 
127  for (const LoadCommand &LC : LoadCommands)
128  for (const std::unique_ptr<Section> &Sec : LC.Sections)
129  for (const RelocationInfo &R : Sec->Relocations)
130  if (R.Symbol && *R.Symbol && DeadSymbols.count(*R.Symbol))
131  return createStringError(std::errc::invalid_argument,
132  "symbol '%s' defined in section with index "
133  "'%u' cannot be removed because it is "
134  "referenced by a relocation in section '%s'",
135  (*R.Symbol)->Name.c_str(),
136  *((*R.Symbol)->section()),
137  Sec->CanonicalName.c_str());
139  for (std::unique_ptr<SymbolEntry> &S : SymTable.Symbols)
140  if (S->section())
141  S->n_sect = OldIndexToSection[S->n_sect]->Index;
142  return Error::success();
143 }
144 
147  is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
149  for (const LoadCommand &LC : LoadCommands) {
150  const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
151  switch (MLC.load_command_data.cmd) {
152  case MachO::LC_SEGMENT:
153  Addr = std::max(Addr,
154  static_cast<uint64_t>(MLC.segment_command_data.vmaddr) +
155  MLC.segment_command_data.vmsize);
156  break;
157  case MachO::LC_SEGMENT_64:
158  Addr = std::max(Addr, MLC.segment_command_64_data.vmaddr +
159  MLC.segment_command_64_data.vmsize);
160  break;
161  default:
162  continue;
163  }
164  }
165  return Addr;
166 }
167 
168 template <typename SegmentType>
169 static void
171  StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize) {
172  assert(SegName.size() <= sizeof(Seg.segname) && "too long segment name");
173  memset(&Seg, 0, sizeof(SegmentType));
174  Seg.cmd = CmdType;
175  strncpy(Seg.segname, SegName.data(), SegName.size());
176  Seg.maxprot |=
178  Seg.initprot |=
180  Seg.vmaddr = SegVMAddr;
181  Seg.vmsize = SegVMSize;
182 }
183 
185  LoadCommand LC;
186  const uint64_t SegVMAddr = nextAvailableSegmentAddress();
187  if (is64Bit())
188  constructSegment(LC.MachOLoadCommand.segment_command_64_data,
189  MachO::LC_SEGMENT_64, SegName, SegVMAddr, SegVMSize);
190  else
191  constructSegment(LC.MachOLoadCommand.segment_command_data,
192  MachO::LC_SEGMENT, SegName, SegVMAddr, SegVMSize);
193 
194  LoadCommands.push_back(std::move(LC));
195  return LoadCommands.back();
196 }
197 
198 /// Extracts a segment name from a string which is possibly non-null-terminated.
199 static StringRef extractSegmentName(const char *SegName) {
200  return StringRef(SegName,
201  strnlen(SegName, sizeof(MachO::segment_command::segname)));
202 }
203 
204 std::optional<StringRef> LoadCommand::getSegmentName() const {
206  switch (MLC.load_command_data.cmd) {
207  case MachO::LC_SEGMENT:
208  return extractSegmentName(MLC.segment_command_data.segname);
209  case MachO::LC_SEGMENT_64:
210  return extractSegmentName(MLC.segment_command_64_data.segname);
211  default:
212  return std::nullopt;
213  }
214 }
215 
216 std::optional<uint64_t> LoadCommand::getSegmentVMAddr() const {
218  switch (MLC.load_command_data.cmd) {
219  case MachO::LC_SEGMENT:
220  return MLC.segment_command_data.vmaddr;
221  case MachO::LC_SEGMENT_64:
222  return MLC.segment_command_64_data.vmaddr;
223  default:
224  return std::nullopt;
225  }
226 }
llvm::objcopy::macho::Object::DyLdInfoCommandIndex
std::optional< size_t > DyLdInfoCommandIndex
The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Definition: MachOObject.h:327
IsDead
bool IsDead
Definition: SILowerControlFlow.cpp:168
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::objcopy::macho::Object::LinkerOptimizationHintCommandIndex
std::optional< size_t > LinkerOptimizationHintCommandIndex
The index of LC_LINKER_OPTIMIZATIN_HINT load command if present.
Definition: MachOObject.h:333
llvm::objcopy::macho::Object::SymTabCommandIndex
std::optional< size_t > SymTabCommandIndex
The index of LC_SYMTAB load command if present.
Definition: MachOObject.h:325
constructSegment
static void constructSegment(SegmentType &Seg, llvm::MachO::LoadCommandType CmdType, StringRef SegName, uint64_t SegVMAddr, uint64_t SegVMSize)
Definition: MachOObject.cpp:170
llvm::objcopy::macho::SymbolTable
The location of the symbol table inside the binary is described by LC_SYMTAB load command.
Definition: MachOObject.h:134
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:547
llvm::Error::success
static ErrorSuccess success()
Create a success value.
Definition: Error.h:330
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1998
Content
T Content
Definition: ELFObjHandler.cpp:89
llvm::objcopy::macho::SymbolTable::removeSymbols
void removeSymbols(function_ref< bool(const std::unique_ptr< SymbolEntry > &)> ToRemove)
Definition: MachOObject.cpp:35
llvm::objcopy::macho::RelocationInfo
Definition: MachOObject.h:172
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MachO::LoadCommandType
LoadCommandType
Definition: MachO.h:98
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::objcopy::macho
Definition: MachOObjcopy.h:26
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::objcopy::macho::Object::DySymTabCommandIndex
std::optional< size_t > DySymTabCommandIndex
The index LC_DYSYMTAB load command if present.
Definition: MachOObject.h:329
llvm::objcopy::macho::MachHeader::SizeOfCmds
uint32_t SizeOfCmds
Definition: MachOObject.h:32
llvm::objcopy::macho::Object::DylibCodeSignDRsIndex
std::optional< size_t > DylibCodeSignDRsIndex
The index of LC_DYLIB_CODE_SIGN_DRS load command if present.
Definition: MachOObject.h:323
llvm::objcopy::macho::Object::nextAvailableSegmentAddress
uint64_t nextAvailableSegmentAddress() const
Definition: MachOObject.cpp:145
llvm::objcopy::macho::Object::removeLoadCommands
Error removeLoadCommands(function_ref< bool(const LoadCommand &)> ToRemove)
Definition: MachOObject.cpp:91
llvm::objcopy::macho::LoadCommand
Definition: MachOObject.h:78
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::MachO::segment_command::segname
char segname[16]
Definition: MachO.h:545
llvm::StringRef::data
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
SmallPtrSet.h
llvm::objcopy::macho::Object::updateLoadCommandIndexes
void updateLoadCommandIndexes()
Definition: MachOObject.cpp:40
llvm::objcopy::macho::Object::FunctionStartsCommandIndex
std::optional< size_t > FunctionStartsCommandIndex
The index LC_FUNCTION_STARTS load command if present.
Definition: MachOObject.h:335
llvm::objcopy::macho::Object::SymTable
SymbolTable SymTable
Definition: MachOObject.h:302
llvm::objcopy::macho::LoadCommand::getSegmentName
std::optional< StringRef > getSegmentName() const
Definition: MachOObject.cpp:204
llvm::BTF::HeaderSize
@ HeaderSize
Definition: BTF.h:60
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::MachO::mach_header_64
Definition: MachO.h:526
llvm::MachO::VM_PROT_EXECUTE
@ VM_PROT_EXECUTE
Definition: MachO.h:494
llvm::MachO::VM_PROT_WRITE
@ VM_PROT_WRITE
Definition: MachO.h:494
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::objcopy::macho::Object::removeSections
Error removeSections(function_ref< bool(const std::unique_ptr< Section > &)> ToRemove)
Definition: MachOObject.cpp:102
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
llvm::objcopy::macho::Object::LoadCommands
std::vector< LoadCommand > LoadCommands
Definition: MachOObject.h:300
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
llvm::objcopy::macho::LoadCommand::getSegmentVMAddr
std::optional< uint64_t > getSegmentVMAddr() const
Definition: MachOObject.cpp:216
llvm::objcopy::macho::LoadCommand::MachOLoadCommand
MachO::macho_load_command MachOLoadCommand
Definition: MachOObject.h:82
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::objcopy::macho::Object::addSegment
LoadCommand & addSegment(StringRef SegName, uint64_t SegVMSize)
Creates a new segment load command in the object and returns a reference to the newly created load co...
Definition: MachOObject.cpp:184
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::objcopy::macho::SymbolTable::getSymbolByIndex
const SymbolEntry * getSymbolByIndex(uint32_t Index) const
Definition: MachOObject.cpp:25
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::objcopy::macho::SymbolEntry
Definition: MachOObject.h:104
llvm::objcopy::macho::Object::DataInCodeCommandIndex
std::optional< size_t > DataInCodeCommandIndex
The index LC_DATA_IN_CODE load command if present.
Definition: MachOObject.h:331
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint32_t
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
MachOObject.h
extractSegmentName
static StringRef extractSegmentName(const char *SegName)
Extracts a segment name from a string which is possibly non-null-terminated.
Definition: MachOObject.cpp:199
llvm::StringRef::size
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::createStringError
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition: Error.h:1246
llvm::objcopy::macho::Object::Header
MachHeader Header
Definition: MachOObject.h:299
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
llvm::MachO::mach_header
Definition: MachO.h:516
llvm::objcopy::macho::Object::TextSegmentCommandIndex
std::optional< size_t > TextSegmentCommandIndex
The index of the LC_SEGMENT or LC_SEGMENT_64 load command corresponding to the __TEXT segment.
Definition: MachOObject.h:342
llvm::objcopy::macho::Object::ChainedFixupsCommandIndex
std::optional< size_t > ChainedFixupsCommandIndex
The index LC_DYLD_CHAINED_FIXUPS load command if present.
Definition: MachOObject.h:337
llvm::objcopy::macho::Section::Section
Section(StringRef SegName, StringRef SectName)
Definition: MachOObject.cpp:16
llvm::MachO::VM_PROT_READ
@ VM_PROT_READ
Definition: MachO.h:494
llvm::objcopy::macho::SymbolTable::Symbols
std::vector< std::unique_ptr< SymbolEntry > > Symbols
Definition: MachOObject.h:135
llvm::objcopy::macho::Section
Definition: MachOObject.h:38
llvm::objcopy::macho::Object::ExportsTrieCommandIndex
std::optional< size_t > ExportsTrieCommandIndex
The index LC_DYLD_EXPORTS_TRIE load command if present.
Definition: MachOObject.h:339
llvm::objcopy::macho::Object::CodeSignatureCommandIndex
std::optional< size_t > CodeSignatureCommandIndex
The index of LC_CODE_SIGNATURE load command if present.
Definition: MachOObject.h:321
llvm::objcopy::macho::Object::is64Bit
bool is64Bit() const
Definition: MachOObject.h:361
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::MachO::macho_load_command
Definition: MachO.h:2147