LLVM  13.0.0git
AMDGPUUnifyDivergentExitNodes.cpp
Go to the documentation of this file.
1 //===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a variant of the UnifyDivergentExitNodes pass. Rather than ensuring
10 // there is at most one ret and one unreachable instruction, it ensures there is
11 // at most one divergent exiting block.
12 //
13 // StructurizeCFG can't deal with multi-exit regions formed by branches to
14 // multiple return nodes. It is not desirable to structurize regions with
15 // uniform branches, so unifying those to the same return block as divergent
16 // branches inhibits use of scalar branching. It still can't deal with the case
17 // where one branch goes to return, and one unreachable. Replace unreachable in
18 // this case with a return.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "SIDefines.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
53 
54 namespace {
55 
56 class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
57 public:
58  static char ID; // Pass identification, replacement for typeid
59 
60  AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
62  }
63 
64  // We can preserve non-critical-edgeness when we unify function exit nodes
65  void getAnalysisUsage(AnalysisUsage &AU) const override;
66  bool runOnFunction(Function &F) override;
67 };
68 
69 } // end anonymous namespace
70 
72 
74 
75 INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
76  "Unify divergent function exit nodes", false, false)
80 INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
81  "Unify divergent function exit nodes", false, false)
82 
83 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
85  AU.addRequired<DominatorTreeWrapperPass>();
86 
87  AU.addRequired<PostDominatorTreeWrapperPass>();
88 
89  AU.addRequired<LegacyDivergenceAnalysis>();
90 
92  AU.addPreserved<DominatorTreeWrapperPass>();
93  // FIXME: preserve PostDominatorTreeWrapperPass
94  }
95 
96  // No divergent values are changed, only blocks and branch edges.
97  AU.addPreserved<LegacyDivergenceAnalysis>();
98 
99  // We preserve the non-critical-edgeness property
100  AU.addPreservedID(BreakCriticalEdgesID);
101 
102  // This is a cluster of orthogonal Transforms
103  AU.addPreservedID(LowerSwitchID);
105 
106  AU.addRequired<TargetTransformInfoWrapperPass>();
107 }
108 
109 /// \returns true if \p BB is reachable through only uniform branches.
110 /// XXX - Is there a more efficient way to find this?
112  BasicBlock &BB) {
115 
116  while (!Stack.empty()) {
117  BasicBlock *Top = Stack.pop_back_val();
118  if (!DA.isUniform(Top->getTerminator()))
119  return false;
120 
121  for (BasicBlock *Pred : predecessors(Top)) {
122  if (Visited.insert(Pred).second)
123  Stack.push_back(Pred);
124  }
125  }
126 
127  return true;
128 }
129 
130 static void removeDoneExport(Function &F) {
131  ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext());
132  for (BasicBlock &BB : F) {
133  for (Instruction &I : BB) {
134  if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) {
135  if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
136  Intrin->setArgOperand(6, BoolFalse); // done
137  } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
138  Intrin->setArgOperand(4, BoolFalse); // done
139  }
140  }
141  }
142  }
143 }
144 
146  ArrayRef<BasicBlock *> ReturningBlocks,
147  bool InsertExport,
148  const TargetTransformInfo &TTI,
149  StringRef Name) {
150  // Otherwise, we need to insert a new basic block into the function, add a PHI
151  // nodes (if the function returns values), and convert all of the return
152  // instructions into unconditional branches.
153  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
154  IRBuilder<> B(NewRetBlock);
155 
156  if (InsertExport) {
157  // Ensure that there's only one "done" export in the shader by removing the
158  // "done" bit set on the original final export. More than one "done" export
159  // can lead to undefined behavior.
161 
162  Value *Undef = UndefValue::get(B.getFloatTy());
163  B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() },
164  {
165  B.getInt32(AMDGPU::Exp::ET_NULL),
166  B.getInt32(0), // enabled channels
167  Undef, Undef, Undef, Undef, // values
168  B.getTrue(), // done
169  B.getTrue(), // valid mask
170  });
171  }
172 
173  PHINode *PN = nullptr;
174  if (F.getReturnType()->isVoidTy()) {
175  B.CreateRetVoid();
176  } else {
177  // If the function doesn't return void... add a PHI node to the block...
178  PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
179  "UnifiedRetVal");
180  assert(!InsertExport);
181  B.CreateRet(PN);
182  }
183 
184  // Loop over all of the blocks, replacing the return instruction with an
185  // unconditional branch.
186  std::vector<DominatorTree::UpdateType> Updates;
187  Updates.reserve(ReturningBlocks.size());
188  for (BasicBlock *BB : ReturningBlocks) {
189  // Add an incoming element to the PHI node for every return instruction that
190  // is merging into this new block...
191  if (PN)
192  PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
193 
194  // Remove and delete the return inst.
195  BB->getTerminator()->eraseFromParent();
196  BranchInst::Create(NewRetBlock, BB);
197  Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
198  }
199 
201  DTU.applyUpdates(Updates);
202  Updates.clear();
203 
204  for (BasicBlock *BB : ReturningBlocks) {
205  // Cleanup possible branch to unconditional branch to the return.
206  simplifyCFG(BB, TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
207  SimplifyCFGOptions().bonusInstThreshold(2));
208  }
209 
210  return NewRetBlock;
211 }
212 
214  DominatorTree *DT = nullptr;
216  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
217 
218  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
219 
220  // If there's only one exit, we don't need to do anything, unless this is a
221  // pixel shader and that exit is an infinite loop, since we still have to
222  // insert an export in that case.
223  if (PDT.root_size() <= 1 && F.getCallingConv() != CallingConv::AMDGPU_PS)
224  return false;
225 
226  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
227 
228  // Loop over all of the blocks in a function, tracking all of the blocks that
229  // return.
230  SmallVector<BasicBlock *, 4> ReturningBlocks;
231  SmallVector<BasicBlock *, 4> UniformlyReachedRetBlocks;
232  SmallVector<BasicBlock *, 4> UnreachableBlocks;
233 
234  // Dummy return block for infinite loop.
235  BasicBlock *DummyReturnBB = nullptr;
236 
237  bool InsertExport = false;
238 
239  bool Changed = false;
240  std::vector<DominatorTree::UpdateType> Updates;
241 
242  for (BasicBlock *BB : PDT.roots()) {
243  if (isa<ReturnInst>(BB->getTerminator())) {
244  if (!isUniformlyReached(DA, *BB))
245  ReturningBlocks.push_back(BB);
246  else
247  UniformlyReachedRetBlocks.push_back(BB);
248  } else if (isa<UnreachableInst>(BB->getTerminator())) {
249  if (!isUniformlyReached(DA, *BB))
250  UnreachableBlocks.push_back(BB);
251  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
252 
253  ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
254  if (DummyReturnBB == nullptr) {
255  DummyReturnBB = BasicBlock::Create(F.getContext(),
256  "DummyReturnBlock", &F);
257  Type *RetTy = F.getReturnType();
258  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
259 
260  // For pixel shaders, the producer guarantees that an export is
261  // executed before each return instruction. However, if there is an
262  // infinite loop and we insert a return ourselves, we need to uphold
263  // that guarantee by inserting a null export. This can happen e.g. in
264  // an infinite loop with kill instructions, which is supposed to
265  // terminate. However, we don't need to do this if there is a non-void
266  // return value, since then there is an epilog afterwards which will
267  // still export.
268  //
269  // Note: In the case where only some threads enter the infinite loop,
270  // this can result in the null export happening redundantly after the
271  // original exports. However, The last "real" export happens after all
272  // the threads that didn't enter an infinite loop converged, which
273  // means that the only extra threads to execute the null export are
274  // threads that entered the infinite loop, and they only could've
275  // exited through being killed which sets their exec bit to 0.
276  // Therefore, unless there's an actual infinite loop, which can have
277  // invalid results, or there's a kill after the last export, which we
278  // assume the frontend won't do, this export will have the same exec
279  // mask as the last "real" export, and therefore the valid mask will be
280  // overwritten with the same value and will still be correct. Also,
281  // even though this forces an extra unnecessary export wait, we assume
282  // that this happens rare enough in practice to that we don't have to
283  // worry about performance.
284  if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
285  RetTy->isVoidTy()) {
286  InsertExport = true;
287  }
288 
289  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
290  ReturningBlocks.push_back(DummyReturnBB);
291  }
292 
293  if (BI->isUnconditional()) {
294  BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
295  BI->eraseFromParent(); // Delete the unconditional branch.
296  // Add a new conditional branch with a dummy edge to the return block.
297  BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
298  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
299  } else { // Conditional branch.
301 
302  // Create a new transition block to hold the conditional branch.
303  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
304 
305  Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
306 
307  // 'Successors' become successors of TransitionBB instead of BB,
308  // and TransitionBB becomes a single successor of BB.
309  Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
310  for (BasicBlock *Successor : Successors) {
311  Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
312  Updates.push_back({DominatorTree::Delete, BB, Successor});
313  }
314 
315  // Create a branch that will always branch to the transition block and
316  // references DummyReturnBB.
317  BB->getTerminator()->eraseFromParent();
318  BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
319  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
320  }
321  Changed = true;
322  }
323  }
324 
325  if (!UnreachableBlocks.empty()) {
326  BasicBlock *UnreachableBlock = nullptr;
327 
328  if (UnreachableBlocks.size() == 1) {
329  UnreachableBlock = UnreachableBlocks.front();
330  } else {
331  UnreachableBlock = BasicBlock::Create(F.getContext(),
332  "UnifiedUnreachableBlock", &F);
333  new UnreachableInst(F.getContext(), UnreachableBlock);
334 
335  Updates.reserve(Updates.size() + UnreachableBlocks.size());
336  for (BasicBlock *BB : UnreachableBlocks) {
337  // Remove and delete the unreachable inst.
338  BB->getTerminator()->eraseFromParent();
339  BranchInst::Create(UnreachableBlock, BB);
340  Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
341  }
342  Changed = true;
343  }
344 
345  if (!ReturningBlocks.empty()) {
346  // Don't create a new unreachable inst if we have a return. The
347  // structurizer/annotator can't handle the multiple exits
348 
349  Type *RetTy = F.getReturnType();
350  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
351  // Remove and delete the unreachable inst.
352  UnreachableBlock->getTerminator()->eraseFromParent();
353 
354  Function *UnreachableIntrin =
355  Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
356 
357  // Insert a call to an intrinsic tracking that this is an unreachable
358  // point, in case we want to kill the active lanes or something later.
359  CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
360 
361  // Don't create a scalar trap. We would only want to trap if this code was
362  // really reached, but a scalar trap would happen even if no lanes
363  // actually reached here.
364  ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
365  ReturningBlocks.push_back(UnreachableBlock);
366  Changed = true;
367  }
368  }
369 
370  // FIXME: add PDT here once simplifycfg is ready.
373  DTU.applyUpdates(Updates);
374  Updates.clear();
375 
376  // Now handle return blocks.
377  if (ReturningBlocks.empty())
378  return Changed; // No blocks return
379 
380  if (ReturningBlocks.size() == 1 && !InsertExport)
381  return Changed; // Already has a single return block
382 
383  const TargetTransformInfo &TTI
384  = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
385 
386  // Unify returning blocks. If we are going to insert the export it is also
387  // necessary to include blocks that are uniformly reached, because in addition
388  // to inserting the export the "done" bits on existing exports will be cleared
389  // and we do not want to end up with the normal export in a non-unified,
390  // uniformly reached block with the "done" bit cleared.
391  auto BlocksToUnify = std::move(ReturningBlocks);
392  if (InsertExport) {
393  llvm::append_range(BlocksToUnify, UniformlyReachedRetBlocks);
394  }
395 
396  unifyReturnBlockSet(F, DTU, BlocksToUnify, InsertExport, TTI,
397  "UnifiedReturnBlock");
398  return true;
399 }
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:125
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1254
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::succ_end
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
llvm::Function
Definition: Function.h:61
StringRef.h
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::IRBuilder<>
DomTreeUpdater.h
Local.h
llvm::AMDGPU::Exp::ET_NULL
@ ET_NULL
Definition: SIDefines.h:720
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
isUniformlyReached
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:111
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
PostDominators.h
Intrinsics.h
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1493
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:278
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1751
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
SmallPtrSet.h
Utils.h
Type.h
CFG.h
BasicBlock.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:73
unifyReturnBlockSet
static BasicBlock * unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU, ArrayRef< BasicBlock * > ReturningBlocks, bool InsertExport, const TargetTransformInfo &TTI, StringRef Name)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:145
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2281
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUUnifyDivergentExitNodes.cpp:52
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3061
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:108
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::succ_begin
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
ArrayRef.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::simplifyCFG
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
Definition: SimplifyCFG.cpp:6590
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1683
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:205
llvm::RequireAndPreserveDomTree
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::LowerSwitchID
char & LowerSwitchID
Definition: LowerSwitch.cpp:572
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:831
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:824
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:33
SIDefines.h
Casting.h
Function.h
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2950
llvm::LegacyDivergenceAnalysis::isUniform
bool isUniform(const Value *V) const
Definition: LegacyDivergenceAnalysis.h:51
llvm::BreakCriticalEdgesID
char & BreakCriticalEdgesID
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:53
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
Instructions.h
LegacyDivergenceAnalysis.h
SmallVector.h
exit
declare void exit(i32) noreturn nounwind This compiles into
Definition: README.txt:1072
Dominators.h
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2572
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4650
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3005
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
removeDoneExport
static void removeDoneExport(Function &F)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:130
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37