42#include "llvm/IR/IntrinsicsAMDGPU.h"
54#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
58class AMDGPUUnifyDivergentExitNodesImpl {
63 AMDGPUUnifyDivergentExitNodesImpl() =
delete;
75class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
87char AMDGPUUnifyDivergentExitNodes::ID = 0;
92 "Unify divergent function exit nodes",
false,
false)
99void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const {
115 FunctionPass::getAnalysisUsage(AU);
126 while (!Stack.empty()) {
132 if (Visited.
insert(Pred).second)
133 Stack.push_back(Pred);
140BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
150 if (
F.getReturnType()->isVoidTy()) {
154 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
161 std::vector<DominatorTree::UpdateType> Updates;
162 Updates.reserve(ReturningBlocks.
size());
167 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
170 BB->getTerminator()->eraseFromParent();
172 Updates.emplace_back(DominatorTree::Insert, BB, NewRetBlock);
195 !isa<BranchInst>(PDT.
getRoot()->getTerminator())))
206 bool Changed =
false;
207 std::vector<DominatorTree::UpdateType> Updates;
216 PDT.
roots(), [&](
auto BB) { return !isUniformlyReached(UA, *BB); });
219 if (isa<ReturnInst>(BB->getTerminator())) {
220 if (HasDivergentExitBlock)
222 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
223 if (HasDivergentExitBlock)
225 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
228 if (DummyReturnBB ==
nullptr) {
230 "DummyReturnBlock", &
F);
234 ReturningBlocks.
push_back(DummyReturnBB);
237 if (BI->isUnconditional()) {
238 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
242 Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
249 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
253 Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
255 Updates.emplace_back(DominatorTree::Insert, TransitionBB,
Successor);
256 Updates.emplace_back(DominatorTree::Delete, BB,
Successor);
263 Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
269 if (!UnreachableBlocks.
empty()) {
272 if (UnreachableBlocks.
size() == 1) {
273 UnreachableBlock = UnreachableBlocks.
front();
276 "UnifiedUnreachableBlock", &
F);
279 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
282 BB->getTerminator()->eraseFromParent();
284 Updates.emplace_back(DominatorTree::Insert, BB, UnreachableBlock);
289 if (!ReturningBlocks.
empty()) {
309 ReturningBlocks.
push_back(UnreachableBlock);
321 if (ReturningBlocks.
empty())
324 if (ReturningBlocks.
size() == 1)
327 unifyReturnBlockSet(
F, DTU, ReturningBlocks,
"UnifiedReturnBlock");
331bool AMDGPUUnifyDivergentExitNodes::runOnFunction(
Function &
F) {
334 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
336 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
337 const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
338 const auto *TranformInfo =
339 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
340 return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(
F, DT, PDT, UA);
353 return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).
run(
F, DT, PDT, UA)
static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB)
Unify divergent function exit nodes
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Performs the initial survey of the specified function
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
Analysis pass which computes a DominatorTree.
iterator_range< root_iterator > roots()
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void applyUpdates(ArrayRef< typename DomTreeT::UpdateType > Updates)
Submit updates to all available trees.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, InsertPosition InsertBefore=nullptr)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
The instances of the Type class are immutable: once they are created, they are never changed.
This function has undefined behavior.
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
This is an optimization pass for GlobalISel generic memory operations.
bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
char & AMDGPUUnifyDivergentExitNodesID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
char & BreakCriticalEdgesID
auto predecessors(const MachineBasicBlock *BB)
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)