41#include "llvm/IR/IntrinsicsAMDGPU.h"
53#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
57class AMDGPUUnifyDivergentExitNodesImpl {
62 AMDGPUUnifyDivergentExitNodesImpl() =
delete;
74class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
86char AMDGPUUnifyDivergentExitNodes::ID = 0;
91 "Unify divergent function exit nodes",
false,
false)
98void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const {
114 FunctionPass::getAnalysisUsage(AU);
125 while (!Stack.empty()) {
131 if (Visited.
insert(Pred).second)
132 Stack.push_back(Pred);
139BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
149 if (
F.getReturnType()->isVoidTy()) {
153 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
160 std::vector<DominatorTree::UpdateType> Updates;
161 Updates.reserve(ReturningBlocks.
size());
166 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
169 BB->getTerminator()->eraseFromParent();
171 Updates.emplace_back(DominatorTree::Insert, BB, NewRetBlock);
194 !isa<BranchInst>(PDT.
getRoot()->getTerminator())))
205 bool Changed =
false;
206 std::vector<DominatorTree::UpdateType> Updates;
215 PDT.
roots(), [&](
auto BB) { return !isUniformlyReached(UA, *BB); });
218 if (isa<ReturnInst>(BB->getTerminator())) {
219 if (HasDivergentExitBlock)
221 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
222 if (HasDivergentExitBlock)
224 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
227 if (DummyReturnBB ==
nullptr) {
229 "DummyReturnBlock", &
F);
233 ReturningBlocks.
push_back(DummyReturnBB);
236 if (BI->isUnconditional()) {
237 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
241 Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
248 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
252 Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
254 Updates.emplace_back(DominatorTree::Insert, TransitionBB,
Successor);
255 Updates.emplace_back(DominatorTree::Delete, BB,
Successor);
262 Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
268 if (!UnreachableBlocks.
empty()) {
271 if (UnreachableBlocks.
size() == 1) {
272 UnreachableBlock = UnreachableBlocks.
front();
275 "UnifiedUnreachableBlock", &
F);
278 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
281 BB->getTerminator()->eraseFromParent();
283 Updates.emplace_back(DominatorTree::Insert, BB, UnreachableBlock);
288 if (!ReturningBlocks.
empty()) {
298 F.getParent(), Intrinsic::amdgcn_unreachable);
308 ReturningBlocks.
push_back(UnreachableBlock);
320 if (ReturningBlocks.
empty())
323 if (ReturningBlocks.
size() == 1)
326 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
330bool AMDGPUUnifyDivergentExitNodes::runOnFunction(
Function &
F) {
333 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
335 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
336 const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
337 const auto *TranformInfo =
338 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
339 return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(
F, DT, PDT, UA);
352 return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).
run(
F, DT, PDT, UA)
static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB)
Unify divergent function exit nodes
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Performs the initial survey of the specified function
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
Analysis pass which computes a DominatorTree.
iterator_range< root_iterator > roots()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
This is an optimization pass for GlobalISel generic memory operations.
bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
char & AMDGPUUnifyDivergentExitNodesID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
char & BreakCriticalEdgesID
auto predecessors(const MachineBasicBlock *BB)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)