LLVM  14.0.0git
AMDGPUUnifyDivergentExitNodes.cpp
Go to the documentation of this file.
1 //===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a variant of the UnifyFunctionExitNodes pass. Rather than ensuring
10 // there is at most one ret and one unreachable instruction, it ensures there is
11 // at most one divergent exiting block.
12 //
13 // StructurizeCFG can't deal with multi-exit regions formed by branches to
14 // multiple return nodes. It is not desirable to structurize regions with
15 // uniform branches, so unifying those to the same return block as divergent
16 // branches inhibits use of scalar branching. It still can't deal with the case
17 // where one branch goes to return, and one unreachable. Replace unreachable in
18 // this case with a return.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "AMDGPU.h"
23 #include "SIDefines.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CFG.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Casting.h"
46 #include "llvm/Transforms/Scalar.h"
47 #include "llvm/Transforms/Utils.h"
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
53 
54 namespace {
55 
56 class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
57 private:
58  const TargetTransformInfo *TTI = nullptr;
59 
60 public:
61  static char ID; // Pass identification, replacement for typeid
62 
63  AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
65  }
66 
67  // We can preserve non-critical-edgeness when we unify function exit nodes
68  void getAnalysisUsage(AnalysisUsage &AU) const override;
69  BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
70  ArrayRef<BasicBlock *> ReturningBlocks,
71  StringRef Name);
72  bool runOnFunction(Function &F) override;
73 };
74 
75 } // end anonymous namespace
76 
78 
80 
81 INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
82  "Unify divergent function exit nodes", false, false)
86 INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
87  "Unify divergent function exit nodes", false, false)
88 
89 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
91  AU.addRequired<DominatorTreeWrapperPass>();
92 
93  AU.addRequired<PostDominatorTreeWrapperPass>();
94 
95  AU.addRequired<LegacyDivergenceAnalysis>();
96 
98  AU.addPreserved<DominatorTreeWrapperPass>();
99  // FIXME: preserve PostDominatorTreeWrapperPass
100  }
101 
102  // No divergent values are changed, only blocks and branch edges.
103  AU.addPreserved<LegacyDivergenceAnalysis>();
104 
105  // We preserve the non-critical-edgeness property
106  AU.addPreservedID(BreakCriticalEdgesID);
107 
108  // This is a cluster of orthogonal Transforms
109  AU.addPreservedID(LowerSwitchID);
111 
112  AU.addRequired<TargetTransformInfoWrapperPass>();
113 }
114 
115 /// \returns true if \p BB is reachable through only uniform branches.
116 /// XXX - Is there a more efficient way to find this?
118  BasicBlock &BB) {
121 
122  while (!Stack.empty()) {
123  BasicBlock *Top = Stack.pop_back_val();
124  if (!DA.isUniform(Top->getTerminator()))
125  return false;
126 
127  for (BasicBlock *Pred : predecessors(Top)) {
128  if (Visited.insert(Pred).second)
129  Stack.push_back(Pred);
130  }
131  }
132 
133  return true;
134 }
135 
136 BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
137  Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
138  StringRef Name) {
139  // Otherwise, we need to insert a new basic block into the function, add a PHI
140  // nodes (if the function returns values), and convert all of the return
141  // instructions into unconditional branches.
142  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
143  IRBuilder<> B(NewRetBlock);
144 
145  PHINode *PN = nullptr;
146  if (F.getReturnType()->isVoidTy()) {
147  B.CreateRetVoid();
148  } else {
149  // If the function doesn't return void... add a PHI node to the block...
150  PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(),
151  "UnifiedRetVal");
152  B.CreateRet(PN);
153  }
154 
155  // Loop over all of the blocks, replacing the return instruction with an
156  // unconditional branch.
157  std::vector<DominatorTree::UpdateType> Updates;
158  Updates.reserve(ReturningBlocks.size());
159  for (BasicBlock *BB : ReturningBlocks) {
160  // Add an incoming element to the PHI node for every return instruction that
161  // is merging into this new block...
162  if (PN)
163  PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
164 
165  // Remove and delete the return inst.
166  BB->getTerminator()->eraseFromParent();
167  BranchInst::Create(NewRetBlock, BB);
168  Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
169  }
170 
172  DTU.applyUpdates(Updates);
173  Updates.clear();
174 
175  for (BasicBlock *BB : ReturningBlocks) {
176  // Cleanup possible branch to unconditional branch to the return.
177  simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? &DTU : nullptr,
178  SimplifyCFGOptions().bonusInstThreshold(2));
179  }
180 
181  return NewRetBlock;
182 }
183 
185  DominatorTree *DT = nullptr;
187  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
188 
189  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
190 
191  // If there's only one exit, we don't need to do anything.
192  if (PDT.root_size() <= 1)
193  return false;
194 
195  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
196  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
197 
198  // Loop over all of the blocks in a function, tracking all of the blocks that
199  // return.
200  SmallVector<BasicBlock *, 4> ReturningBlocks;
201  SmallVector<BasicBlock *, 4> UnreachableBlocks;
202 
203  // Dummy return block for infinite loop.
204  BasicBlock *DummyReturnBB = nullptr;
205 
206  bool Changed = false;
207  std::vector<DominatorTree::UpdateType> Updates;
208 
209  for (BasicBlock *BB : PDT.roots()) {
210  if (isa<ReturnInst>(BB->getTerminator())) {
211  if (!isUniformlyReached(DA, *BB))
212  ReturningBlocks.push_back(BB);
213  } else if (isa<UnreachableInst>(BB->getTerminator())) {
214  if (!isUniformlyReached(DA, *BB))
215  UnreachableBlocks.push_back(BB);
216  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
217 
218  ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
219  if (DummyReturnBB == nullptr) {
220  DummyReturnBB = BasicBlock::Create(F.getContext(),
221  "DummyReturnBlock", &F);
222  Type *RetTy = F.getReturnType();
223  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
224  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
225  ReturningBlocks.push_back(DummyReturnBB);
226  }
227 
228  if (BI->isUnconditional()) {
229  BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
230  BI->eraseFromParent(); // Delete the unconditional branch.
231  // Add a new conditional branch with a dummy edge to the return block.
232  BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
233  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
234  } else { // Conditional branch.
236 
237  // Create a new transition block to hold the conditional branch.
238  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
239 
240  Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
241 
242  // 'Successors' become successors of TransitionBB instead of BB,
243  // and TransitionBB becomes a single successor of BB.
244  Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
245  for (BasicBlock *Successor : Successors) {
246  Updates.push_back({DominatorTree::Insert, TransitionBB, Successor});
247  Updates.push_back({DominatorTree::Delete, BB, Successor});
248  }
249 
250  // Create a branch that will always branch to the transition block and
251  // references DummyReturnBB.
252  BB->getTerminator()->eraseFromParent();
253  BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
254  Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
255  }
256  Changed = true;
257  }
258  }
259 
260  if (!UnreachableBlocks.empty()) {
261  BasicBlock *UnreachableBlock = nullptr;
262 
263  if (UnreachableBlocks.size() == 1) {
264  UnreachableBlock = UnreachableBlocks.front();
265  } else {
266  UnreachableBlock = BasicBlock::Create(F.getContext(),
267  "UnifiedUnreachableBlock", &F);
268  new UnreachableInst(F.getContext(), UnreachableBlock);
269 
270  Updates.reserve(Updates.size() + UnreachableBlocks.size());
271  for (BasicBlock *BB : UnreachableBlocks) {
272  // Remove and delete the unreachable inst.
273  BB->getTerminator()->eraseFromParent();
274  BranchInst::Create(UnreachableBlock, BB);
275  Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
276  }
277  Changed = true;
278  }
279 
280  if (!ReturningBlocks.empty()) {
281  // Don't create a new unreachable inst if we have a return. The
282  // structurizer/annotator can't handle the multiple exits
283 
284  Type *RetTy = F.getReturnType();
285  Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
286  // Remove and delete the unreachable inst.
287  UnreachableBlock->getTerminator()->eraseFromParent();
288 
289  Function *UnreachableIntrin =
290  Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
291 
292  // Insert a call to an intrinsic tracking that this is an unreachable
293  // point, in case we want to kill the active lanes or something later.
294  CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
295 
296  // Don't create a scalar trap. We would only want to trap if this code was
297  // really reached, but a scalar trap would happen even if no lanes
298  // actually reached here.
299  ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
300  ReturningBlocks.push_back(UnreachableBlock);
301  Changed = true;
302  }
303  }
304 
305  // FIXME: add PDT here once simplifycfg is ready.
308  DTU.applyUpdates(Updates);
309  Updates.clear();
310 
311  // Now handle return blocks.
312  if (ReturningBlocks.empty())
313  return Changed; // No blocks return
314 
315  if (ReturningBlocks.size() == 1)
316  return Changed; // Already has a single return block
317 
318  unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock");
319  return true;
320 }
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
Scalar.h
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
llvm::Function
Definition: Function.h:61
StringRef.h
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::IRBuilder<>
DomTreeUpdater.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:129
llvm::DominatorTreeBase< BasicBlock, false >::Insert
static constexpr UpdateKind Insert
Definition: GenericDomTree.h:242
isUniformlyReached
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
Definition: AMDGPUUnifyDivergentExitNodes.cpp:117
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
PostDominators.h
Intrinsics.h
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1518
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::initializeAMDGPUUnifyDivergentExitNodesPass
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::M68kBeads::DA
@ DA
Definition: M68kBaseInfo.h:59
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
llvm::LegacyDivergenceAnalysis
Definition: LegacyDivergenceAnalysis.h:31
SmallPtrSet.h
Utils.h
Type.h
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
CFG.h
BasicBlock.h
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::AMDGPUUnifyDivergentExitNodesID
char & AMDGPUUnifyDivergentExitNodesID
Definition: AMDGPUUnifyDivergentExitNodes.cpp:79
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUUnifyDivergentExitNodes.cpp:52
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2783
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3124
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:108
ArrayRef.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
IRBuilder.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::simplifyCFG
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
Definition: SimplifyCFG.cpp:6765
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::DomTreeUpdater::UpdateStrategy::Eager
@ Eager
llvm::RequireAndPreserveDomTree
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
llvm::LowerSwitchID
char & LowerSwitchID
Definition: LowerSwitch.cpp:570
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
SIDefines.h
Casting.h
Function.h
llvm::ReturnInst::Create
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3013
llvm::BreakCriticalEdgesID
char & BreakCriticalEdgesID
llvm::SimplifyCFGOptions
Definition: SimplifyCFGOptions.h:23
Instructions.h
LegacyDivergenceAnalysis.h
SmallVector.h
exit
declare void exit(i32) noreturn nounwind This compiles into
Definition: README.txt:1072
Dominators.h
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2633
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::UnreachableInst
This function has undefined behavior.
Definition: Instructions.h:4715
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3068
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::DominatorTreeBase< BasicBlock, false >::Delete
static constexpr UpdateKind Delete
Definition: GenericDomTree.h:243
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37