41 #include "llvm/IR/IntrinsicsAMDGPU.h" 52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes" 56 class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
76 "Unify divergent function exit nodes",
false,
false)
83 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const{
117 Stack.push_back(Pred);
119 while (!Stack.empty()) {
125 if (Visited.
insert(Pred).second)
126 Stack.push_back(Pred);
138 if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
139 Intrin->setArgOperand(6, BoolFalse);
140 }
else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
141 Intrin->setArgOperand(4, BoolFalse);
166 B.CreateIntrinsic(Intrinsic::amdgcn_exp, {
B.getFloatTy() },
177 if (
F.getReturnType()->isVoidTy()) {
181 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
189 std::vector<DominatorTree::UpdateType> Updates;
190 Updates.reserve(ReturningBlocks.
size());
195 PN->
addIncoming(BB->getTerminator()->getOperand(0), BB);
198 BB->getTerminator()->eraseFromParent();
219 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
221 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
240 bool InsertExport =
false;
242 bool Changed =
false;
243 std::vector<DominatorTree::UpdateType> Updates;
246 if (isa<ReturnInst>(BB->getTerminator())) {
251 }
else if (isa<UnreachableInst>(BB->getTerminator())) {
254 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
257 if (DummyReturnBB ==
nullptr) {
259 "DummyReturnBlock", &
F);
260 Type *RetTy =
F.getReturnType();
293 ReturningBlocks.
push_back(DummyReturnBB);
296 if (BI->isUnconditional()) {
297 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
308 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
328 if (!UnreachableBlocks.
empty()) {
331 if (UnreachableBlocks.
size() == 1) {
332 UnreachableBlock = UnreachableBlocks.
front();
335 "UnifiedUnreachableBlock", &
F);
338 Updates.reserve(Updates.size() + UnreachableBlocks.
size());
348 if (!ReturningBlocks.
empty()) {
352 Type *RetTy =
F.getReturnType();
368 ReturningBlocks.
push_back(UnreachableBlock);
376 DTU.applyUpdates(Updates);
380 if (ReturningBlocks.
empty())
383 if (ReturningBlocks.
size() == 1 && !InsertExport)
387 = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
394 auto BlocksToUnify =
std::move(ReturningBlocks);
400 "UnifiedReturnBlock");
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static ConstantInt * getFalse(LLVMContext &Context)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
LLVM_NODISCARD bool empty() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
void push_back(const T &Elt)
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
Value of the register doesn't matter.
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
static constexpr UpdateKind Delete
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Interval::succ_iterator succ_end(Interval *I)
bool isVoidTy() const
Return true if this is 'void'.
static bool runOnFunction(Function &F, bool PostInlining)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static constexpr UpdateKind Insert
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed.
Conditional or Unconditional Branch instruction.
char & BreakCriticalEdgesID
size_t size() const
size - Get the array size.
This function has undefined behavior.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
static BasicBlock * unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU, ArrayRef< BasicBlock * > ReturningBlocks, bool InsertExport, const TargetTransformInfo &TTI, StringRef Name)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is the shared class of boolean and integer constants.
char & AMDGPUUnifyDivergentExitNodesID
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Calling convention used for Mesa/AMDPAL pixel shaders.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
pred_range predecessors(BasicBlock *BB)
static ConstantInt * getTrue(LLVMContext &Context)
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, SmallPtrSetImpl< BasicBlock * > *LoopHeaders=nullptr)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
bool isUniform(const Value *V) const
print Print MemDeps of function
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
StringRef - Represent a constant reference to a string, i.e.
static void removeDoneExport(Function &F)
Legacy analysis pass which computes a DominatorTree.
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
A wrapper class for inspecting calls to intrinsic functions.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)