LLVM  4.0.0
SplitModule.cpp
Go to the documentation of this file.
1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the function llvm::SplitModule, which splits a module
11 // into multiple linkable partitions. It can be used to implement parallel code
12 // generation for link-time optimization.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "split-module"
17 
20 #include "llvm/ADT/Hashing.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalAlias.h"
26 #include "llvm/IR/GlobalObject.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MD5.h"
33 #include <queue>
34 
35 using namespace llvm;
36 
37 namespace {
38 typedef EquivalenceClasses<const GlobalValue *> ClusterMapType;
39 typedef DenseMap<const Comdat *, const GlobalValue *> ComdatMembersType;
40 typedef DenseMap<const GlobalValue *, unsigned> ClusterIDMapType;
41 }
42 
43 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
44  const GlobalValue *GV, const User *U) {
45  assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
46 
47  if (const Instruction *I = dyn_cast<Instruction>(U)) {
48  const GlobalValue *F = I->getParent()->getParent();
49  GVtoClusterMap.unionSets(GV, F);
50  } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
51  isa<GlobalVariable>(U)) {
52  GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
53  } else {
54  llvm_unreachable("Underimplemented use case");
55  }
56 }
57 
58 // Adds all GlobalValue users of V to the same cluster as GV.
59 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
60  const GlobalValue *GV, const Value *V) {
61  for (auto *U : V->users()) {
63  Worklist.push_back(U);
64  while (!Worklist.empty()) {
65  const User *UU = Worklist.pop_back_val();
66  // For each constant that is not a GV (a pure const) recurse.
67  if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
68  Worklist.append(UU->user_begin(), UU->user_end());
69  continue;
70  }
71  addNonConstUser(GVtoClusterMap, GV, UU);
72  }
73  }
74 }
75 
76 // Find partitions for module in the way that no locals need to be
77 // globalized.
78 // Try to balance pack those partitions into N files since this roughly equals
79 // thread balancing for the backend codegen step.
80 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
81  unsigned N) {
82  // At this point module should have the proper mix of globals and locals.
83  // As we attempt to partition this module, we must not change any
84  // locals to globals.
85  DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
86  ClusterMapType GVtoClusterMap;
87  ComdatMembersType ComdatMembers;
88 
89  auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
90  if (GV.isDeclaration())
91  return;
92 
93  if (!GV.hasName())
94  GV.setName("__llvmsplit_unnamed");
95 
96  // Comdat groups must not be partitioned. For comdat groups that contain
97  // locals, record all their members here so we can keep them together.
98  // Comdat groups that only contain external globals are already handled by
99  // the MD5-based partitioning.
100  if (const Comdat *C = GV.getComdat()) {
101  auto &Member = ComdatMembers[C];
102  if (Member)
103  GVtoClusterMap.unionSets(Member, &GV);
104  else
105  Member = &GV;
106  }
107 
108  // For aliases we should not separate them from their aliasees regardless
109  // of linkage.
110  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
111  if (const GlobalObject *Base = GIS->getBaseObject())
112  GVtoClusterMap.unionSets(&GV, Base);
113  }
114 
115  if (const Function *F = dyn_cast<Function>(&GV)) {
116  for (const BasicBlock &BB : *F) {
118  if (!BA || !BA->isConstantUsed())
119  continue;
120  addAllGlobalValueUsers(GVtoClusterMap, F, BA);
121  }
122  }
123 
124  if (GV.hasLocalLinkage())
125  addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
126  };
127 
128  std::for_each(M->begin(), M->end(), recordGVSet);
129  std::for_each(M->global_begin(), M->global_end(), recordGVSet);
130  std::for_each(M->alias_begin(), M->alias_end(), recordGVSet);
131 
132  // Assigned all GVs to merged clusters while balancing number of objects in
133  // each.
134  auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
135  const std::pair<unsigned, unsigned> &b) {
136  if (a.second || b.second)
137  return a.second > b.second;
138  else
139  return a.first > b.first;
140  };
141 
142  std::priority_queue<std::pair<unsigned, unsigned>,
143  std::vector<std::pair<unsigned, unsigned>>,
144  decltype(CompareClusters)>
145  BalancinQueue(CompareClusters);
146  // Pre-populate priority queue with N slot blanks.
147  for (unsigned i = 0; i < N; ++i)
148  BalancinQueue.push(std::make_pair(i, 0));
149 
150  typedef std::pair<unsigned, ClusterMapType::iterator> SortType;
153 
154  // To guarantee determinism, we have to sort SCC according to size.
155  // When size is the same, use leader's name.
156  for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
157  E = GVtoClusterMap.end(); I != E; ++I)
158  if (I->isLeader())
159  Sets.push_back(
160  std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
161  GVtoClusterMap.member_end()), I));
162 
163  std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
164  if (a.first == b.first)
165  return a.second->getData()->getName() > b.second->getData()->getName();
166  else
167  return a.first > b.first;
168  });
169 
170  for (auto &I : Sets) {
171  unsigned CurrentClusterID = BalancinQueue.top().first;
172  unsigned CurrentClusterSize = BalancinQueue.top().second;
173  BalancinQueue.pop();
174 
175  DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
176  << ") ----> " << I.second->getData()->getName() << "\n");
177 
178  for (ClusterMapType::member_iterator MI =
179  GVtoClusterMap.findLeader(I.second);
180  MI != GVtoClusterMap.member_end(); ++MI) {
181  if (!Visited.insert(*MI).second)
182  continue;
183  DEBUG(dbgs() << "----> " << (*MI)->getName()
184  << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
185  Visited.insert(*MI);
186  ClusterIDMap[*MI] = CurrentClusterID;
187  CurrentClusterSize++;
188  }
189  // Add this set size to the number of entries in this cluster.
190  BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
191  }
192 }
193 
194 static void externalize(GlobalValue *GV) {
195  if (GV->hasLocalLinkage()) {
198  }
199 
200  // Unnamed entities must be named consistently between modules. setName will
201  // give a distinct name to each such entity.
202  if (!GV->hasName())
203  GV->setName("__llvmsplit_unnamed");
204 }
205 
206 // Returns whether GV should be in partition (0-based) I of N.
207 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
208  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
209  if (const GlobalObject *Base = GIS->getBaseObject())
210  GV = Base;
211 
212  StringRef Name;
213  if (const Comdat *C = GV->getComdat())
214  Name = C->getName();
215  else
216  Name = GV->getName();
217 
218  // Partition by MD5 hash. We only need a few bits for evenness as the number
219  // of partitions will generally be in the 1-2 figure range; the low 16 bits
220  // are enough.
221  MD5 H;
222  MD5::MD5Result R;
223  H.update(Name);
224  H.final(R);
225  return (R[0] | (R[1] << 8)) % N == I;
226 }
227 
229  std::unique_ptr<Module> M, unsigned N,
230  function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
231  bool PreserveLocals) {
232  if (!PreserveLocals) {
233  for (Function &F : *M)
234  externalize(&F);
235  for (GlobalVariable &GV : M->globals())
236  externalize(&GV);
237  for (GlobalAlias &GA : M->aliases())
238  externalize(&GA);
239  for (GlobalIFunc &GIF : M->ifuncs())
240  externalize(&GIF);
241  }
242 
243  // This performs splitting without a need for externalization, which might not
244  // always be possible.
245  ClusterIDMapType ClusterIDMap;
246  findPartitions(M.get(), ClusterIDMap, N);
247 
248  // FIXME: We should be able to reuse M as the last partition instead of
249  // cloning it.
250  for (unsigned I = 0; I < N; ++I) {
251  ValueToValueMapTy VMap;
252  std::unique_ptr<Module> MPart(
253  CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
254  if (ClusterIDMap.count(GV))
255  return (ClusterIDMap[GV] == I);
256  else
257  return isInPartition(GV, I, N);
258  }));
259  if (I != 0)
260  MPart->setModuleInlineAsm("");
261  ModuleCallback(std::move(MPart));
262  }
263 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:225
bool hasName() const
Definition: Value.h:236
size_t i
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
void SplitModule(std::unique_ptr< Module > M, unsigned N, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback, bool PreserveLocals=false)
Splits the module M into N linkable partitions.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:83
Externally visible function.
Definition: GlobalValue.h:49
static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const Value *V)
Definition: SplitModule.cpp:59
static void externalize(GlobalValue *GV)
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
The address of a basic block.
Definition: Constants.h:822
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:257
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
global_iterator global_begin()
Definition: Module.h:518
#define F(x, y, z)
Definition: MD5.cpp:51
bool isConstantUsed() const
Return true if the constant has users other than constant expressions and other dangling things...
Definition: Constants.cpp:403
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:187
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, unsigned N)
Definition: SplitModule.cpp:80
alias_iterator alias_end()
Definition: Module.h:559
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U)
Definition: SplitModule.cpp:43
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:53
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
std::unique_ptr< Module > CloneModule(const Module *M)
Return an exact copy of the specified module.
Definition: CloneModule.cpp:27
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
global_iterator global_end()
Definition: Module.h:520
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
Comdat * getComdat()
Definition: Globals.cpp:155
uint8_t MD5Result[16]
Definition: MD5.h:49
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
alias_iterator alias_begin()
Definition: Module.h:557
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:232
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:424
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static BlockAddress * lookup(const BasicBlock *BB)
Lookup an existing BlockAddress constant for the given BasicBlock.
Definition: Constants.cpp:1374
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
iterator_range< user_iterator > users()
Definition: Value.h:370
size_t size() const
Definition: Module.h:543
const GlobalObject * getBaseObject() const
Definition: GlobalValue.h:517
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
iterator end()
Definition: Module.h:537
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
iterator begin()
Definition: Module.h:535
Definition: MD5.h:39
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N)
bool hasLocalLinkage() const
Definition: GlobalValue.h:415
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
LLVM Value Representation.
Definition: Value.h:71
#define DEBUG(X)
Definition: Debug.h:100
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
user_iterator user_end()
Definition: Value.h:354