LLVM  4.0.0
ThinLTOBitcodeWriter.cpp
Go to the documentation of this file.
1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass prepares a module containing type metadata for ThinLTO by splitting
11 // it into regular and thin LTO parts if possible, and writing both parts to
12 // a multi-module bitcode file. Modules that do not contain type metadata are
13 // written unmodified as a single module.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Transforms/IPO.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/PassManager.h"
25 #include "llvm/Pass.h"
28 using namespace llvm;
29 
30 namespace {
31 
32 // Produce a unique identifier for this module by taking the MD5 sum of the
33 // names of the module's strong external symbols. This identifier is
34 // normally guaranteed to be unique, or the program would fail to link due to
35 // multiply defined symbols.
36 //
37 // If the module has no strong external symbols (such a module may still have a
38 // semantic effect if it performs global initialization), we cannot produce a
39 // unique identifier for this module, so we return the empty string, which
40 // causes the entire module to be written as a regular LTO module.
41 std::string getModuleId(Module *M) {
42  MD5 Md5;
43  bool ExportsSymbols = false;
44  auto AddGlobal = [&](GlobalValue &GV) {
45  if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
46  !GV.hasExternalLinkage())
47  return;
48  ExportsSymbols = true;
49  Md5.update(GV.getName());
50  Md5.update(ArrayRef<uint8_t>{0});
51  };
52 
53  for (auto &F : *M)
54  AddGlobal(F);
55  for (auto &GV : M->globals())
56  AddGlobal(GV);
57  for (auto &GA : M->aliases())
58  AddGlobal(GA);
59  for (auto &IF : M->ifuncs())
60  AddGlobal(IF);
61 
62  if (!ExportsSymbols)
63  return "";
64 
66  Md5.final(R);
67 
68  SmallString<32> Str;
69  MD5::stringifyResult(R, Str);
70  return ("$" + Str).str();
71 }
72 
73 // Promote each local-linkage entity defined by ExportM and used by ImportM by
74 // changing visibility and appending the given ModuleId.
75 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
76  auto PromoteInternal = [&](GlobalValue &ExportGV) {
77  if (!ExportGV.hasLocalLinkage())
78  return;
79 
80  GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
81  if (!ImportGV || ImportGV->use_empty())
82  return;
83 
84  std::string NewName = (ExportGV.getName() + ModuleId).str();
85 
86  ExportGV.setName(NewName);
87  ExportGV.setLinkage(GlobalValue::ExternalLinkage);
88  ExportGV.setVisibility(GlobalValue::HiddenVisibility);
89 
90  ImportGV->setName(NewName);
92  };
93 
94  for (auto &F : ExportM)
95  PromoteInternal(F);
96  for (auto &GV : ExportM.globals())
97  PromoteInternal(GV);
98  for (auto &GA : ExportM.aliases())
99  PromoteInternal(GA);
100  for (auto &IF : ExportM.ifuncs())
101  PromoteInternal(IF);
102 }
103 
104 // Promote all internal (i.e. distinct) type ids used by the module by replacing
105 // them with external type ids formed using the module id.
106 //
107 // Note that this needs to be done before we clone the module because each clone
108 // will receive its own set of distinct metadata nodes.
109 void promoteTypeIds(Module &M, StringRef ModuleId) {
110  DenseMap<Metadata *, Metadata *> LocalToGlobal;
111  auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
112  Metadata *MD =
113  cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
114 
115  if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
116  Metadata *&GlobalMD = LocalToGlobal[MD];
117  if (!GlobalMD) {
118  std::string NewName =
119  (to_string(LocalToGlobal.size()) + ModuleId).str();
120  GlobalMD = MDString::get(M.getContext(), NewName);
121  }
122 
123  CI->setArgOperand(ArgNo,
124  MetadataAsValue::get(M.getContext(), GlobalMD));
125  }
126  };
127 
128  if (Function *TypeTestFunc =
129  M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
130  for (const Use &U : TypeTestFunc->uses()) {
131  auto CI = cast<CallInst>(U.getUser());
132  ExternalizeTypeId(CI, 1);
133  }
134  }
135 
136  if (Function *TypeCheckedLoadFunc =
137  M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
138  for (const Use &U : TypeCheckedLoadFunc->uses()) {
139  auto CI = cast<CallInst>(U.getUser());
140  ExternalizeTypeId(CI, 2);
141  }
142  }
143 
144  for (GlobalObject &GO : M.global_objects()) {
146  GO.getMetadata(LLVMContext::MD_type, MDs);
147 
148  GO.eraseMetadata(LLVMContext::MD_type);
149  for (auto MD : MDs) {
150  auto I = LocalToGlobal.find(MD->getOperand(1));
151  if (I == LocalToGlobal.end()) {
152  GO.addMetadata(LLVMContext::MD_type, *MD);
153  continue;
154  }
155  GO.addMetadata(
157  *MDNode::get(M.getContext(),
158  ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
159  }
160  }
161 }
162 
163 // Drop unused globals, and drop type information from function declarations.
164 // FIXME: If we made functions typeless then there would be no need to do this.
165 void simplifyExternals(Module &M) {
166  FunctionType *EmptyFT =
168 
169  for (auto I = M.begin(), E = M.end(); I != E;) {
170  Function &F = *I++;
171  if (F.isDeclaration() && F.use_empty()) {
172  F.eraseFromParent();
173  continue;
174  }
175 
176  if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
177  continue;
178 
179  Function *NewF =
181  NewF->setVisibility(F.getVisibility());
182  NewF->takeName(&F);
184  F.eraseFromParent();
185  }
186 
187  for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
188  GlobalVariable &GV = *I++;
189  if (GV.isDeclaration() && GV.use_empty()) {
190  GV.eraseFromParent();
191  continue;
192  }
193  }
194 }
195 
196 void filterModule(
197  Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
198  for (Function &F : *M) {
199  if (ShouldKeepDefinition(&F))
200  continue;
201 
202  F.deleteBody();
203  F.clearMetadata();
204  }
205 
206  for (GlobalVariable &GV : M->globals()) {
207  if (ShouldKeepDefinition(&GV))
208  continue;
209 
210  GV.setInitializer(nullptr);
212  GV.clearMetadata();
213  }
214 
215  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
216  I != E;) {
217  GlobalAlias *GA = &*I++;
218  if (ShouldKeepDefinition(GA))
219  continue;
220 
221  GlobalObject *GO;
222  if (I->getValueType()->isFunctionTy())
223  GO = Function::Create(cast<FunctionType>(GA->getValueType()),
225  else
226  GO = new GlobalVariable(
227  *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
228  (Constant *)nullptr, "", (GlobalVariable *)nullptr,
229  GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
230  GO->takeName(GA);
231  GA->replaceAllUsesWith(GO);
232  GA->eraseFromParent();
233  }
234 }
235 
236 // If it's possible to split M into regular and thin LTO parts, do so and write
237 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a
238 // regular LTO bitcode file to OS.
239 void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
240  std::string ModuleId = getModuleId(&M);
241  if (ModuleId.empty()) {
242  // We couldn't generate a module ID for this module, just write it out as a
243  // regular LTO module.
244  WriteBitcodeToFile(&M, OS);
245  return;
246  }
247 
248  promoteTypeIds(M, ModuleId);
249 
250  auto IsInMergedM = [&](const GlobalValue *GV) {
251  auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
252  if (!GVar)
253  return false;
254 
256  GVar->getMetadata(LLVMContext::MD_type, MDs);
257  return !MDs.empty();
258  };
259 
260  ValueToValueMapTy VMap;
261  std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
262 
263  filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
264 
265  promoteInternals(*MergedM, M, ModuleId);
266  promoteInternals(M, *MergedM, ModuleId);
267 
268  simplifyExternals(*MergedM);
269 
270  SmallVector<char, 0> Buffer;
271  BitcodeWriter W(Buffer);
272 
273  // FIXME: Try to re-use BSI and PFI from the original module here.
274  ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
275  W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
276  /*GenerateHash=*/true);
277 
278  W.writeModule(MergedM.get());
279 
280  OS << Buffer;
281 }
282 
283 // Returns whether this module needs to be split because it uses type metadata.
284 bool requiresSplit(Module &M) {
286  for (auto &GO : M.global_objects()) {
287  GO.getMetadata(LLVMContext::MD_type, MDs);
288  if (!MDs.empty())
289  return true;
290  }
291 
292  return false;
293 }
294 
295 void writeThinLTOBitcode(raw_ostream &OS, Module &M,
296  const ModuleSummaryIndex *Index) {
297  // See if this module has any type metadata. If so, we need to split it.
298  if (requiresSplit(M))
299  return splitAndWriteThinLTOBitcode(OS, M);
300 
301  // Otherwise we can just write it out as a regular module.
302  WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
303  /*GenerateHash=*/true);
304 }
305 
306 class WriteThinLTOBitcode : public ModulePass {
307  raw_ostream &OS; // raw_ostream to print on
308 
309 public:
310  static char ID; // Pass identification, replacement for typeid
311  WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
313  }
314 
315  explicit WriteThinLTOBitcode(raw_ostream &o)
316  : ModulePass(ID), OS(o) {
318  }
319 
320  StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
321 
322  bool runOnModule(Module &M) override {
323  const ModuleSummaryIndex *Index =
324  &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
325  writeThinLTOBitcode(OS, M, Index);
326  return true;
327  }
328  void getAnalysisUsage(AnalysisUsage &AU) const override {
329  AU.setPreservesAll();
331  }
332 };
333 } // anonymous namespace
334 
335 char WriteThinLTOBitcode::ID = 0;
336 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
337  "Write ThinLTO Bitcode", false, true)
339 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
340  "Write ThinLTO Bitcode", false, true)
341 
343  return new WriteThinLTOBitcode(Str);
344 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:225
iterator_range< use_iterator > uses()
Definition: Value.h:326
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:219
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:414
This is the interface to build a ModuleSummaryIndex for a module.
Type * getValueType() const
Definition: GlobalValue.h:261
This class represents a function call, abstracting a target machine's calling convention.
write thinlto bitcode
Externally visible function.
Definition: GlobalValue.h:49
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:905
ModuleSummaryIndex buildModuleSummaryIndex(const Module &M, std::function< BlockFrequencyInfo *(const Function &F)> GetBFICallback, ProfileSummaryInfo *PSI)
Direct function to compute a ModuleSummaryIndex from a given module.
iterator_range< global_object_iterator > global_objects()
Definition: Module.h:599
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:555
void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
Definition: Globals.cpp:323
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:257
void eraseFromParent() override
eraseFromParent - This method unlinks 'this' from the containing module and deletes it...
Definition: Globals.cpp:319
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
global_iterator global_begin()
Definition: Module.h:518
Class to represent function types.
Definition: DerivedTypes.h:102
#define F(x, y, z)
Definition: MD5.cpp:51
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:291
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:187
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:263
Class to hold module path string table and global value map, and encapsulate methods for operating on...
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:196
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:74
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1695
write thinlto Write ThinLTO Bitcode
void deleteBody()
deleteBody - This method deletes the body of the function, and converts the linkage to external...
Definition: Function.h:475
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void stringifyResult(MD5Result &Result, SmallString< 32 > &Str)
Translates the bytes in Res to a hex string that is deposited into Str.
Definition: MD5.cpp:262
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:154
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
std::unique_ptr< Module > CloneModule(const Module *M)
Return an exact copy of the specified module.
Definition: CloneModule.cpp:27
static void write(bool isBE, void *P, T V)
global_iterator global_end()
Definition: Module.h:520
Iterator for intrusive lists based on ilist_node.
void initializeWriteThinLTOBitcodePass(PassRegistry &)
uint8_t MD5Result[16]
Definition: MD5.h:49
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:232
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:424
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
void setPreservesAll()
Set by analyses that do not transform their input at all.
const GlobalObject * getBaseObject() const
Definition: GlobalValue.h:517
void eraseFromParent() override
eraseFromParent - This method unlinks 'this' from the containing module and deletes it...
Definition: Function.cpp:246
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1132
Basic Alias true
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:259
iterator end()
Definition: Module.h:537
void WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder=false, const ModuleSummaryIndex *Index=nullptr, bool GenerateHash=false)
Write the specified module to the specified raw output stream.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:188
#define I(x, y, z)
Definition: MD5.cpp:54
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.cpp:230
void setArgOperand(unsigned i, Value *v)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:235
iterator begin()
Definition: Module.h:535
Definition: MD5.h:39
ModulePass * createWriteThinLTOBitcodePass(raw_ostream &Str)
Write ThinLTO-ready bitcode to Str.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
const std::string to_string(const T &Value)
Definition: ScopedPrinter.h:62
bool use_empty() const
Definition: Value.h:299
write thinlto Write ThinLTO false
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N="", Module *M=nullptr)
Definition: Function.h:117
This header defines various interfaces for pass management in LLVM.
Root of the metadata hierarchy.
Definition: Metadata.h:55
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type. ...
Definition: Module.cpp:93
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode,"write-thinlto-bitcode","Write ThinLTO Bitcode", false, true) INITIALIZE_PASS_END(WriteThinLTOBitcode
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:222
Legacy wrapper pass to provide the ModuleSummaryIndex object.