LLVM  17.0.0git
LegacyDivergenceAnalysis.cpp
Go to the documentation of this file.
1 //===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
2 //Implementation -==//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements divergence analysis which determines whether a branch
11 // in a GPU program is divergent.It can help branch optimizations such as jump
12 // threading and loop unswitching to make better decisions.
13 //
14 // GPU programs typically use the SIMD execution model, where multiple threads
15 // in the same execution group have to execute in lock-step. Therefore, if the
16 // code contains divergent branches (i.e., threads in a group do not agree on
17 // which path of the branch to take), the group of threads has to execute all
18 // the paths from that branch with different subsets of threads enabled until
19 // they converge at the immediately post-dominating BB of the paths.
20 //
21 // Due to this execution model, some optimizations such as jump
22 // threading and loop unswitching can be unfortunately harmful when performed on
23 // divergent branches. Therefore, an analysis that computes which branches in a
24 // GPU program are divergent can help the compiler to selectively run these
25 // optimizations.
26 //
27 // This file defines divergence analysis which computes a conservative but
28 // non-trivial approximation of all divergent branches in a GPU program. It
29 // partially implements the approach described in
30 //
31 // Divergence Analysis
32 // Sampaio, Souza, Collange, Pereira
33 // TOPLAS '13
34 //
35 // The divergence analysis identifies the sources of divergence (e.g., special
36 // variables that hold the thread ID), and recursively marks variables that are
37 // data or sync dependent on a source of divergence as divergent.
38 //
39 // While data dependency is a well-known concept, the notion of sync dependency
40 // is worth more explanation. Sync dependence characterizes the control flow
41 // aspect of the propagation of branch divergence. For example,
42 //
43 // %cond = icmp slt i32 %tid, 10
44 // br i1 %cond, label %then, label %else
45 // then:
46 // br label %merge
47 // else:
48 // br label %merge
49 // merge:
50 // %a = phi i32 [ 0, %then ], [ 1, %else ]
51 //
52 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
53 // because %tid is not on its use-def chains, %a is sync dependent on %tid
54 // because the branch "br i1 %cond" depends on %tid and affects which value %a
55 // is assigned to.
56 //
57 // The current implementation has the following limitations:
58 // 1. intra-procedural. It conservatively considers the arguments of a
59 // non-kernel-entry function and the return value of a function call as
60 // divergent.
61 // 2. memory as black box. It conservatively considers values loaded from
62 // generic or local address as divergent. This can be improved by leveraging
63 // pointer analysis.
64 //
65 //===----------------------------------------------------------------------===//
66 
69 #include "llvm/Analysis/CFG.h"
71 #include "llvm/Analysis/LoopInfo.h"
72 #include "llvm/Analysis/Passes.h"
75 #include "llvm/IR/Dominators.h"
76 #include "llvm/IR/InstIterator.h"
77 #include "llvm/IR/Instructions.h"
78 #include "llvm/IR/Value.h"
79 #include "llvm/InitializePasses.h"
81 #include "llvm/Support/Debug.h"
83 #include <vector>
84 using namespace llvm;
85 
86 #define DEBUG_TYPE "divergence"
87 
88 // transparently use the GPUDivergenceAnalysis
89 static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
90  cl::Hidden,
91  cl::desc("turn the LegacyDivergenceAnalysis into "
92  "a wrapper for GPUDivergenceAnalysis"));
93 
94 namespace {
95 
97 public:
101  : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
102  void populateWithSourcesOfDivergence();
103  void propagate();
104 
105 private:
106  // A helper function that explores data dependents of V.
107  void exploreDataDependency(Value *V);
108  // A helper function that explores sync dependents of TI.
109  void exploreSyncDependency(Instruction *TI);
110  // Computes the influence region from Start to End. This region includes all
111  // basic blocks on any simple path from Start to End.
112  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
113  DenseSet<BasicBlock *> &InfluenceRegion);
114  // Finds all users of I that are outside the influence region, and add these
115  // users to Worklist.
116  void findUsersOutsideInfluenceRegion(
117  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
118 
119  Function &F;
121  DominatorTree &DT;
122  PostDominatorTree &PDT;
123  std::vector<Value *> Worklist; // Stack for DFS.
124  DenseSet<const Value *> &DV; // Stores all divergent values.
125  DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
126  // values.
127 };
128 
129 void DivergencePropagator::populateWithSourcesOfDivergence() {
130  Worklist.clear();
131  DV.clear();
132  DU.clear();
133  for (auto &I : instructions(F)) {
134  if (TTI.isSourceOfDivergence(&I)) {
135  Worklist.push_back(&I);
136  DV.insert(&I);
137  }
138  }
139  for (auto &Arg : F.args()) {
140  if (TTI.isSourceOfDivergence(&Arg)) {
141  Worklist.push_back(&Arg);
142  DV.insert(&Arg);
143  }
144  }
145 }
146 
147 void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
148  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
149  // immediate post dominator are divergent. This rule handles if-then-else
150  // patterns. For example,
151  //
152  // if (tid < 5)
153  // a1 = 1;
154  // else
155  // a2 = 2;
156  // a = phi(a1, a2); // sync dependent on (tid < 5)
157  BasicBlock *ThisBB = TI->getParent();
158 
159  // Unreachable blocks may not be in the dominator tree.
160  if (!DT.isReachableFromEntry(ThisBB))
161  return;
162 
163  // If the function has no exit blocks or doesn't reach any exit blocks, the
164  // post dominator may be null.
165  DomTreeNode *ThisNode = PDT.getNode(ThisBB);
166  if (!ThisNode)
167  return;
168 
169  BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
170  if (IPostDom == nullptr)
171  return;
172 
173  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
174  // A PHINode is uniform if it returns the same value no matter which path is
175  // taken.
176  if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
177  Worklist.push_back(&*I);
178  }
179 
180  // Propagation rule 2: if a value defined in a loop is used outside, the user
181  // is sync dependent on the condition of the loop exits that dominate the
182  // user. For example,
183  //
184  // int i = 0;
185  // do {
186  // i++;
187  // if (foo(i)) ... // uniform
188  // } while (i < tid);
189  // if (bar(i)) ... // divergent
190  //
191  // A program may contain unstructured loops. Therefore, we cannot leverage
192  // LoopInfo, which only recognizes natural loops.
193  //
194  // The algorithm used here handles both natural and unstructured loops. Given
195  // a branch TI, we first compute its influence region, the union of all simple
196  // paths from TI to its immediate post dominator (IPostDom). Then, we search
197  // for all the values defined in the influence region but used outside. All
198  // these users are sync dependent on TI.
199  DenseSet<BasicBlock *> InfluenceRegion;
200  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
201  // An insight that can speed up the search process is that all the in-region
202  // values that are used outside must dominate TI. Therefore, instead of
203  // searching every basic blocks in the influence region, we search all the
204  // dominators of TI until it is outside the influence region.
205  BasicBlock *InfluencedBB = ThisBB;
206  while (InfluenceRegion.count(InfluencedBB)) {
207  for (auto &I : *InfluencedBB) {
208  if (!DV.count(&I))
209  findUsersOutsideInfluenceRegion(I, InfluenceRegion);
210  }
211  DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
212  if (IDomNode == nullptr)
213  break;
214  InfluencedBB = IDomNode->getBlock();
215  }
216 }
217 
218 void DivergencePropagator::findUsersOutsideInfluenceRegion(
219  Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
220  for (Use &Use : I.uses()) {
221  Instruction *UserInst = cast<Instruction>(Use.getUser());
222  if (!InfluenceRegion.count(UserInst->getParent())) {
223  DU.insert(&Use);
224  if (DV.insert(UserInst).second)
225  Worklist.push_back(UserInst);
226  }
227  }
228 }
229 
230 // A helper function for computeInfluenceRegion that adds successors of "ThisBB"
231 // to the influence region.
232 static void
233 addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
234  DenseSet<BasicBlock *> &InfluenceRegion,
235  std::vector<BasicBlock *> &InfluenceStack) {
236  for (BasicBlock *Succ : successors(ThisBB)) {
237  if (Succ != End && InfluenceRegion.insert(Succ).second)
238  InfluenceStack.push_back(Succ);
239  }
240 }
241 
242 void DivergencePropagator::computeInfluenceRegion(
243  BasicBlock *Start, BasicBlock *End,
244  DenseSet<BasicBlock *> &InfluenceRegion) {
245  assert(PDT.properlyDominates(End, Start) &&
246  "End does not properly dominate Start");
247 
248  // The influence region starts from the end of "Start" to the beginning of
249  // "End". Therefore, "Start" should not be in the region unless "Start" is in
250  // a loop that doesn't contain "End".
251  std::vector<BasicBlock *> InfluenceStack;
252  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
253  while (!InfluenceStack.empty()) {
254  BasicBlock *BB = InfluenceStack.back();
255  InfluenceStack.pop_back();
256  addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
257  }
258 }
259 
260 void DivergencePropagator::exploreDataDependency(Value *V) {
261  // Follow def-use chains of V.
262  for (User *U : V->users()) {
263  if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
264  Worklist.push_back(U);
265  }
266 }
267 
268 void DivergencePropagator::propagate() {
269  // Traverse the dependency graph using DFS.
270  while (!Worklist.empty()) {
271  Value *V = Worklist.back();
272  Worklist.pop_back();
273  if (Instruction *I = dyn_cast<Instruction>(V)) {
274  // Terminators with less than two successors won't introduce sync
275  // dependency. Ignore them.
276  if (I->isTerminator() && I->getNumSuccessors() > 1)
277  exploreSyncDependency(I);
278  }
279  exploreDataDependency(V);
280  }
281 }
282 
283 } // namespace
284 
285 // Register this pass.
289 }
291  "Legacy Divergence Analysis", false, true)
296  "Legacy Divergence Analysis", false, true)
297 
299  return new LegacyDivergenceAnalysis();
300 }
301 
303  const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
305  return false;
306 
307  // GPUDivergenceAnalysis requires a reducible CFG.
308  using RPOTraversal = ReversePostOrderTraversal<const Function *>;
309  RPOTraversal FuncRPOT(&F);
310  return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
311  const LoopInfo>(FuncRPOT, LI);
312 }
313 
318  const llvm::LoopInfo &LI) {
320  // run the new GPU divergence analysis
321  gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
322  /* KnownReducible = */ true);
323 
324  } else {
325  // run LLVM's existing DivergenceAnalysis
327  DP.populateWithSourcesOfDivergence();
328  DP.propagate();
329  }
330 }
331 
333  if (gpuDA) {
334  return gpuDA->isDivergent(*V);
335  }
336  return DivergentValues.count(V);
337 }
338 
340  if (gpuDA) {
341  return gpuDA->isDivergentUse(*U);
342  }
343  return DivergentValues.count(U->get()) || DivergentUses.count(U);
344 }
345 
347  const Module *) const {
348  if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
349  return;
350 
351  const Function *F = nullptr;
352  if (!DivergentValues.empty()) {
353  const Value *FirstDivergentValue = *DivergentValues.begin();
354  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
355  F = Arg->getParent();
356  } else if (const Instruction *I =
357  dyn_cast<Instruction>(FirstDivergentValue)) {
358  F = I->getParent()->getParent();
359  } else {
360  llvm_unreachable("Only arguments and instructions can be divergent");
361  }
362  } else if (gpuDA) {
363  F = &gpuDA->getFunction();
364  }
365  if (!F)
366  return;
367 
368  // Dumps all divergent values in F, arguments and then instructions.
369  for (const auto &Arg : F->args()) {
370  OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
371  OS << Arg << "\n";
372  }
373  // Iterate instructions using instructions() to ensure a deterministic order.
374  for (const BasicBlock &BB : *F) {
375  OS << "\n " << BB.getName() << ":\n";
376  for (const auto &I : BB.instructionsWithoutDebug()) {
377  OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
378  OS << I << "\n";
379  }
380  }
381  OS << "\n";
382 }
383 
388  AU.setPreservesAll();
389 }
390 
392  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
393  if (TTIWP == nullptr)
394  return false;
395 
396  TargetTransformInfo &TTI = TTIWP->getTTI(F);
397  // Fast path: if the target does not have branch divergence, we do not mark
398  // any branch as divergent.
399  if (!TTI.hasBranchDivergence())
400  return false;
401 
403  DivergentUses.clear();
404  gpuDA = nullptr;
405 
406  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
407  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
408  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
409  LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
410  LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
411  << ":\n";
412  LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
413 
414  return false;
415 }
416 
419  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
420  if (!TTI.hasBranchDivergence())
421  return PreservedAnalyses::all();
422 
423  DivergentValues.clear();
424  DivergentUses.clear();
425  gpuDA = nullptr;
426 
427  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
428  auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
429  auto &LI = AM.getResult<LoopAnalysis>(F);
430  LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
431  LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
432  << ":\n";
433  LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
434  return PreservedAnalyses::all();
435 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2607
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis
bool shouldUseGPUDivergenceAnalysis(const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI)
Definition: LegacyDivergenceAnalysis.cpp:302
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
InstIterator.h
llvm::Function
Definition: Function.h:59
llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition: MachineSSAContext.h:29
llvm::LegacyDivergenceAnalysisImpl::run
void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, PostDominatorTree &PDT, const LoopInfo &LI)
Definition: LegacyDivergenceAnalysis.cpp:314
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::LegacyDivergenceAnalysisImpl::isDivergentUse
bool isDivergentUse(const Use *U) const
Definition: LegacyDivergenceAnalysis.cpp:339
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::LegacyDivergenceAnalysis::LegacyDivergenceAnalysis
LegacyDivergenceAnalysis()
Definition: LegacyDivergenceAnalysis.cpp:287
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
llvm::initializeLegacyDivergenceAnalysisPass
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)
DivergenceAnalysis.h
llvm::LegacyDivergenceAnalysisImpl::DivergentValues
DenseSet< const Value * > DivergentValues
Definition: LegacyDivergenceAnalysis.h:68
llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:90
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
CommandLine.h
llvm::LegacyDivergenceAnalysis::ID
static char ID
Definition: LegacyDivergenceAnalysis.h:77
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
PostDominators.h
llvm::LegacyDivergenceAnalysisImpl::DivergentUses
DenseSet< const Use * > DivergentUses
Definition: LegacyDivergenceAnalysis.h:71
llvm::User
Definition: User.h:44
llvm::TargetTransformInfo::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis() const
Return true if the target prefers to use GPU divergence analysis to replace the legacy version.
Definition: TargetTransformInfo.cpp:242
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
llvm::TargetTransformInfo::hasBranchDivergence
bool hasBranchDivergence() const
Return true if branch divergence exists.
Definition: TargetTransformInfo.cpp:238
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
divergence
divergence
Definition: LegacyDivergenceAnalysis.cpp:295
llvm::Instruction
Definition: Instruction.h:41
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
llvm::TargetTransformInfo::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
Definition: TargetTransformInfo.cpp:246
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:74
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
LoopInfo.h
llvm::DivergencePropagator::DT
const DominatorTreeT & DT
Definition: GenericUniformityImpl.h:493
llvm::LegacyDivergenceAnalysis::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: LegacyDivergenceAnalysis.cpp:391
llvm::DenseSet< const Value * >
llvm::cl::opt< bool >
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:89
llvm::LegacyDivergenceAnalysisImpl::isDivergent
bool isDivergent(const Value *V) const
Definition: LegacyDivergenceAnalysis.cpp:332
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1804
llvm::LegacyDivergenceAnalysis::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: LegacyDivergenceAnalysis.cpp:384
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::detail::DenseSetImpl::empty
bool empty() const
Definition: DenseSet.h:80
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
UseGPUDA
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
llvm::detail::DenseSetImpl::begin
iterator begin()
Definition: DenseSet.h:173
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:167
llvm::DivergencePropagator
Compute divergence starting with a divergent branch.
Definition: GenericUniformityImpl.h:139
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
llvm::LegacyDivergenceAnalysisPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LegacyDivergenceAnalysis.cpp:418
llvm::detail::DenseSetImpl::clear
void clear()
Definition: DenseSet.h:92
CFG.h
llvm::LoopInfo
Definition: LoopInfo.h:1108
DP
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
Definition: README_P9.txt:520
llvm::LegacyDivergenceAnalysisImpl::print
void print(raw_ostream &OS, const Module *) const
Definition: LegacyDivergenceAnalysis.cpp:346
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::DomTreeNodeBase< BasicBlock >
llvm::LegacyDivergenceAnalysisImpl::gpuDA
std::unique_ptr< DivergenceInfo > gpuDA
Definition: LegacyDivergenceAnalysis.h:65
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:293
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Instructions.h
PostOrderIterator.h
LegacyDivergenceAnalysis.h
Dominators.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:90
TargetTransformInfo.h
llvm::createLegacyDivergenceAnalysisPass
FunctionPass * createLegacyDivergenceAnalysisPass()
Definition: LegacyDivergenceAnalysis.cpp:298
llvm::AnalysisUsage::addRequiredTransitive
AnalysisUsage & addRequiredTransitive()
Definition: PassAnalysisSupport.h:81
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
llvm::cl::desc
Definition: CommandLine.h:411
raw_ostream.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::DivergencePropagator::DivergencePropagator
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
Definition: GenericUniformityImpl.h:508
llvm::TargetTransformInfo::isAlwaysUniform
bool isAlwaysUniform(const Value *V) const
Definition: TargetTransformInfo.cpp:250
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::containsIrreducibleCFG
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
Definition: CFG.h:136
Passes.h