LLVM  6.0.0svn
SplitModule.cpp
Go to the documentation of this file.
1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the function llvm::SplitModule, which splits a module
11 // into multiple linkable partitions. It can be used to implement parallel code
12 // generation for link-time optimization.
13 //
14 //===----------------------------------------------------------------------===//
15 
17 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/Comdat.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/GlobalAlias.h"
27 #include "llvm/IR/GlobalObject.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/GlobalVariable.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/User.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MD5.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <iterator>
45 #include <memory>
46 #include <queue>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "split-module"
53 
54 namespace {
55 
56 using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
57 using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
58 using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
59 
60 } // end anonymous namespace
61 
62 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
63  const GlobalValue *GV, const User *U) {
64  assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
65 
66  if (const Instruction *I = dyn_cast<Instruction>(U)) {
67  const GlobalValue *F = I->getParent()->getParent();
68  GVtoClusterMap.unionSets(GV, F);
69  } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
70  isa<GlobalVariable>(U)) {
71  GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
72  } else {
73  llvm_unreachable("Underimplemented use case");
74  }
75 }
76 
77 // Adds all GlobalValue users of V to the same cluster as GV.
78 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
79  const GlobalValue *GV, const Value *V) {
80  for (auto *U : V->users()) {
82  Worklist.push_back(U);
83  while (!Worklist.empty()) {
84  const User *UU = Worklist.pop_back_val();
85  // For each constant that is not a GV (a pure const) recurse.
86  if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
87  Worklist.append(UU->user_begin(), UU->user_end());
88  continue;
89  }
90  addNonConstUser(GVtoClusterMap, GV, UU);
91  }
92  }
93 }
94 
95 // Find partitions for module in the way that no locals need to be
96 // globalized.
97 // Try to balance pack those partitions into N files since this roughly equals
98 // thread balancing for the backend codegen step.
99 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
100  unsigned N) {
101  // At this point module should have the proper mix of globals and locals.
102  // As we attempt to partition this module, we must not change any
103  // locals to globals.
104  DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n");
105  ClusterMapType GVtoClusterMap;
106  ComdatMembersType ComdatMembers;
107 
108  auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
109  if (GV.isDeclaration())
110  return;
111 
112  if (!GV.hasName())
113  GV.setName("__llvmsplit_unnamed");
114 
115  // Comdat groups must not be partitioned. For comdat groups that contain
116  // locals, record all their members here so we can keep them together.
117  // Comdat groups that only contain external globals are already handled by
118  // the MD5-based partitioning.
119  if (const Comdat *C = GV.getComdat()) {
120  auto &Member = ComdatMembers[C];
121  if (Member)
122  GVtoClusterMap.unionSets(Member, &GV);
123  else
124  Member = &GV;
125  }
126 
127  // For aliases we should not separate them from their aliasees regardless
128  // of linkage.
129  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
130  if (const GlobalObject *Base = GIS->getBaseObject())
131  GVtoClusterMap.unionSets(&GV, Base);
132  }
133 
134  if (const Function *F = dyn_cast<Function>(&GV)) {
135  for (const BasicBlock &BB : *F) {
137  if (!BA || !BA->isConstantUsed())
138  continue;
139  addAllGlobalValueUsers(GVtoClusterMap, F, BA);
140  }
141  }
142 
143  if (GV.hasLocalLinkage())
144  addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
145  };
146 
147  llvm::for_each(M->functions(), recordGVSet);
148  llvm::for_each(M->globals(), recordGVSet);
149  llvm::for_each(M->aliases(), recordGVSet);
150 
151  // Assigned all GVs to merged clusters while balancing number of objects in
152  // each.
153  auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
154  const std::pair<unsigned, unsigned> &b) {
155  if (a.second || b.second)
156  return a.second > b.second;
157  else
158  return a.first > b.first;
159  };
160 
161  std::priority_queue<std::pair<unsigned, unsigned>,
162  std::vector<std::pair<unsigned, unsigned>>,
163  decltype(CompareClusters)>
164  BalancinQueue(CompareClusters);
165  // Pre-populate priority queue with N slot blanks.
166  for (unsigned i = 0; i < N; ++i)
167  BalancinQueue.push(std::make_pair(i, 0));
168 
169  using SortType = std::pair<unsigned, ClusterMapType::iterator>;
170 
173 
174  // To guarantee determinism, we have to sort SCC according to size.
175  // When size is the same, use leader's name.
176  for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
177  E = GVtoClusterMap.end(); I != E; ++I)
178  if (I->isLeader())
179  Sets.push_back(
180  std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
181  GVtoClusterMap.member_end()), I));
182 
183  std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) {
184  if (a.first == b.first)
185  return a.second->getData()->getName() > b.second->getData()->getName();
186  else
187  return a.first > b.first;
188  });
189 
190  for (auto &I : Sets) {
191  unsigned CurrentClusterID = BalancinQueue.top().first;
192  unsigned CurrentClusterSize = BalancinQueue.top().second;
193  BalancinQueue.pop();
194 
195  DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first
196  << ") ----> " << I.second->getData()->getName() << "\n");
197 
198  for (ClusterMapType::member_iterator MI =
199  GVtoClusterMap.findLeader(I.second);
200  MI != GVtoClusterMap.member_end(); ++MI) {
201  if (!Visited.insert(*MI).second)
202  continue;
203  DEBUG(dbgs() << "----> " << (*MI)->getName()
204  << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
205  Visited.insert(*MI);
206  ClusterIDMap[*MI] = CurrentClusterID;
207  CurrentClusterSize++;
208  }
209  // Add this set size to the number of entries in this cluster.
210  BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
211  }
212 }
213 
214 static void externalize(GlobalValue *GV) {
215  if (GV->hasLocalLinkage()) {
218  }
219 
220  // Unnamed entities must be named consistently between modules. setName will
221  // give a distinct name to each such entity.
222  if (!GV->hasName())
223  GV->setName("__llvmsplit_unnamed");
224 }
225 
226 // Returns whether GV should be in partition (0-based) I of N.
227 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
228  if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
229  if (const GlobalObject *Base = GIS->getBaseObject())
230  GV = Base;
231 
232  StringRef Name;
233  if (const Comdat *C = GV->getComdat())
234  Name = C->getName();
235  else
236  Name = GV->getName();
237 
238  // Partition by MD5 hash. We only need a few bits for evenness as the number
239  // of partitions will generally be in the 1-2 figure range; the low 16 bits
240  // are enough.
241  MD5 H;
242  MD5::MD5Result R;
243  H.update(Name);
244  H.final(R);
245  return (R[0] | (R[1] << 8)) % N == I;
246 }
247 
249  std::unique_ptr<Module> M, unsigned N,
250  function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
251  bool PreserveLocals) {
252  if (!PreserveLocals) {
253  for (Function &F : *M)
254  externalize(&F);
255  for (GlobalVariable &GV : M->globals())
256  externalize(&GV);
257  for (GlobalAlias &GA : M->aliases())
258  externalize(&GA);
259  for (GlobalIFunc &GIF : M->ifuncs())
260  externalize(&GIF);
261  }
262 
263  // This performs splitting without a need for externalization, which might not
264  // always be possible.
265  ClusterIDMapType ClusterIDMap;
266  findPartitions(M.get(), ClusterIDMap, N);
267 
268  // FIXME: We should be able to reuse M as the last partition instead of
269  // cloning it.
270  for (unsigned I = 0; I < N; ++I) {
271  ValueToValueMapTy VMap;
272  std::unique_ptr<Module> MPart(
273  CloneModule(M.get(), VMap, [&](const GlobalValue *GV) {
274  if (ClusterIDMap.count(GV))
275  return (ClusterIDMap[GV] == I);
276  else
277  return isInPartition(GV, I, N);
278  }));
279  if (I != 0)
280  MPart->setModuleInlineAsm("");
281  ModuleCallback(std::move(MPart));
282  }
283 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:231
uint64_t CallInst * C
bool hasLocalLinkage() const
Definition: GlobalValue.h:427
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
bool isConstantUsed() const
Return true if the constant has users other than constant expressions and other dangling things...
Definition: Constants.cpp:415
void SplitModule(std::unique_ptr< Module > M, unsigned N, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback, bool PreserveLocals=false)
Splits the module M into N linkable partitions.
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:89
Externally visible function.
Definition: GlobalValue.h:49
static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const Value *V)
Definition: SplitModule.cpp:78
F(f)
static void externalize(GlobalValue *GV)
The address of a basic block.
Definition: Constants.h:813
void update(ArrayRef< uint8_t > Data)
Updates the hash for the byte stream provided.
Definition: MD5.cpp:189
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:286
This file contains the declaration of the Comdat class, which represents a single COMDAT in LLVM...
static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, unsigned N)
Definition: SplitModule.cpp:99
iterator_range< iterator > functions()
Definition: Module.h:583
bool hasName() const
Definition: Value.h:251
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U)
Definition: SplitModule.cpp:62
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
std::unique_ptr< Module > CloneModule(const Module *M)
Return an exact copy of the specified module.
Definition: CloneModule.cpp:36
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:436
static BlockAddress * lookup(const BasicBlock *BB)
Lookup an existing BlockAddress constant for the given BasicBlock.
Definition: Constants.cpp:1357
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Comdat * getComdat() const
Definition: Globals.cpp:166
iterator_range< user_iterator > users()
Definition: Value.h:401
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
size_t size() const
Definition: Module.h:580
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Definition: MD5.h:41
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:377
LLVM Value Representation.
Definition: Value.h:73
#define DEBUG(X)
Definition: Debug.h:118
void final(MD5Result &Result)
Finishes off the hash and puts the result in result.
Definition: MD5.cpp:234
iterator_range< global_iterator > globals()
Definition: Module.h:561
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp=Comparator())
Definition: Parallel.h:199
UnaryPredicate for_each(R &&Range, UnaryPredicate P)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:806
iterator_range< alias_iterator > aliases()
Definition: Module.h:601
user_iterator user_end()
Definition: Value.h:385