Go to the documentation of this file.
41 #include "llvm/IR/IntrinsicsAMDGPU.h"
52 #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
56 class AMDGPUUnifyDivergentExitNodes :
public FunctionPass {
76 "Unify divergent function exit nodes",
false,
false)
83 void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(
AnalysisUsage &AU)
const{
116 while (!Stack.empty()) {
122 if (Visited.
insert(Pred).second)
123 Stack.push_back(Pred);
135 if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) {
136 Intrin->setArgOperand(6, BoolFalse);
137 }
else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) {
138 Intrin->setArgOperand(4, BoolFalse);
163 B.CreateIntrinsic(Intrinsic::amdgcn_exp, {
B.getFloatTy() },
174 if (
F.getReturnType()->isVoidTy()) {
178 PN =
B.CreatePHI(
F.getReturnType(), ReturningBlocks.
size(),
186 std::vector<DominatorTree::UpdateType> Updates;
187 Updates.reserve(ReturningBlocks.
size());
195 BB->getTerminator()->eraseFromParent();
216 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
218 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
237 bool InsertExport =
false;
239 bool Changed =
false;
240 std::vector<DominatorTree::UpdateType> Updates;
243 if (isa<ReturnInst>(
BB->getTerminator())) {
245 ReturningBlocks.push_back(
BB);
247 UniformlyReachedRetBlocks.push_back(
BB);
248 }
else if (isa<UnreachableInst>(
BB->getTerminator())) {
250 UnreachableBlocks.push_back(
BB);
251 }
else if (
BranchInst *BI = dyn_cast<BranchInst>(
BB->getTerminator())) {
254 if (DummyReturnBB ==
nullptr) {
256 "DummyReturnBlock", &
F);
257 Type *RetTy =
F.getReturnType();
290 ReturningBlocks.push_back(DummyReturnBB);
293 if (BI->isUnconditional()) {
294 BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
303 BasicBlock *TransitionBB =
BB->splitBasicBlock(BI,
"TransitionBlock");
305 Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
317 BB->getTerminator()->eraseFromParent();
325 if (!UnreachableBlocks.empty()) {
328 if (UnreachableBlocks.size() == 1) {
329 UnreachableBlock = UnreachableBlocks.front();
332 "UnifiedUnreachableBlock", &
F);
335 Updates.reserve(Updates.size() + UnreachableBlocks.size());
338 BB->getTerminator()->eraseFromParent();
345 if (!ReturningBlocks.empty()) {
349 Type *RetTy =
F.getReturnType();
365 ReturningBlocks.push_back(UnreachableBlock);
373 DTU.applyUpdates(Updates);
377 if (ReturningBlocks.empty())
380 if (ReturningBlocks.size() == 1 && !InsertExport)
384 = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
391 auto BlocksToUnify =
std::move(ReturningBlocks);
397 "UnifiedReturnBlock");
pred_range predecessors(BasicBlock *BB)
This class represents lattice values for constants.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Interval::succ_iterator succ_end(Interval *I)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
The instances of the Type class are immutable: once they are created, they are never changed.
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
static constexpr UpdateKind Insert
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, BasicBlock &BB)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
LLVM Basic Block Representation.
This is the shared class of boolean and integer constants.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Represent the analysis usage information of a pass.
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Legacy analysis pass which computes a DominatorTree.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
char & AMDGPUUnifyDivergentExitNodesID
static BasicBlock * unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU, ArrayRef< BasicBlock * > ReturningBlocks, bool InsertExport, const TargetTransformInfo &TTI, StringRef Name)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
print Print MemDeps of function
bool isVoidTy() const
Return true if this is 'void'.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static ConstantInt * getFalse(LLVMContext &Context)
static bool runOnFunction(Function &F, bool PostInlining)
static ConstantInt * getTrue(LLVMContext &Context)
static ReturnInst * Create(LLVMContext &C, Value *retVal=nullptr, Instruction *InsertBefore=nullptr)
bool isUniform(const Value *V) const
char & BreakCriticalEdgesID
@ Undef
Value of the register doesn't matter.
A wrapper class for inspecting calls to intrinsic functions.
declare void exit(i32) noreturn nounwind This compiles into
size_t size() const
size - Get the array size.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
This function has undefined behavior.
Conditional or Unconditional Branch instruction.
LLVM Value Representation.
static void removeDoneExport(Function &F)
static constexpr UpdateKind Delete
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.