41#include "llvm/IR/IntrinsicsAMDGPU.h"
52#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
56class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
77char AMDGPUUnifyDivergentExitNodes::ID = 0;
82 "Unify divergent function exit nodes",
false,
false)
89void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const{
110 FunctionPass::getAnalysisUsage(AU);
122 while (!Stack.empty()) {
128 if (Visited.
insert(Pred).second)
129 Stack.push_back(Pred);
136BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
146 if (
F.getReturnType()->isVoidTy()) {
150 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
157 std::vector<DominatorTree::UpdateType> Updates;
158 Updates.reserve(ReturningBlocks.
size());
163 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
166 BB->getTerminator()->eraseFromParent();
168 Updates.push_back({DominatorTree::Insert, BB, NewRetBlock});
184bool AMDGPUUnifyDivergentExitNodes::runOnFunction(
Function &
F) {
187 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
189 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
190 if (PDT.root_size() == 0 ||
191 (PDT.root_size() == 1 &&
192 !isa<BranchInst>(PDT.getRoot()->getTerminator())))
196 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
206 bool Changed =
false;
207 std::vector<DominatorTree::UpdateType> Updates;
216 PDT.roots(), [&](
auto BB) { return !isUniformlyReached(DA, *BB); });
219 if (isa<ReturnInst>(BB->getTerminator())) {
220 if (HasDivergentExitBlock)
222 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
223 if (HasDivergentExitBlock)
225 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
228 if (DummyReturnBB ==
nullptr) {
230 "DummyReturnBlock", &
F);
234 ReturningBlocks.
push_back(DummyReturnBB);
237 if (BI->isUnconditional()) {
238 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
242 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
249 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
253 Updates.push_back({DominatorTree::Insert, BB, TransitionBB});
255 Updates.push_back({DominatorTree::Insert, TransitionBB,
Successor});
256 Updates.push_back({DominatorTree::Delete, BB,
Successor});
263 Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
269 if (!UnreachableBlocks.
empty()) {
272 if (UnreachableBlocks.
size() == 1) {
273 UnreachableBlock = UnreachableBlocks.
front();
276 "UnifiedUnreachableBlock", &
F);
279 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
282 BB->getTerminator()->eraseFromParent();
284 Updates.push_back({DominatorTree::Insert, BB, UnreachableBlock});
289 if (!ReturningBlocks.
empty()) {
309 ReturningBlocks.
push_back(UnreachableBlock);
321 if (ReturningBlocks.
empty())
324 if (ReturningBlocks.
size() == 1)
327 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
Unify divergent function exit nodes
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
print Print MemDeps of function
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
char & AMDGPUUnifyDivergentExitNodesID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
char & BreakCriticalEdgesID
auto predecessors(const MachineBasicBlock *BB)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)