24#include "llvm/IR/IntrinsicsAMDGPU.h"
31#define DEBUG_TYPE "si-annotate-control-flow"
36using StackEntry = std::pair<BasicBlock *, Value *>;
39class SIAnnotateControlFlow {
79 bool eraseIfUnused(
PHINode *Phi);
102 :
F(&
F), UA(&UA), DT(&DT), LI(&LI) {
112void SIAnnotateControlFlow::initialize(
const GCNSubtarget &ST) {
118 :
Type::getInt64Ty(Context);
124 IntMaskZero = ConstantInt::get(IntMask, 0);
129bool SIAnnotateControlFlow::isUniform(
BranchInst *
T) {
130 return UA->isUniform(
T) ||
T->hasMetadata(
"structurizecfg.uniform");
134bool SIAnnotateControlFlow::isTopOfStack(
BasicBlock *BB) {
135 return !
Stack.empty() &&
Stack.back().first == BB;
139Value *SIAnnotateControlFlow::popSaved() {
140 return Stack.pop_back_val().second;
150bool SIAnnotateControlFlow::isElse(
PHINode *Phi) {
152 for (
unsigned i = 0, e =
Phi->getNumIncomingValues(); i != e; ++i) {
153 if (
Phi->getIncomingBlock(i) == IDom) {
155 if (
Phi->getIncomingValue(i) != BoolTrue)
159 if (
Phi->getIncomingValue(i) != BoolFalse)
167bool SIAnnotateControlFlow::hasKill(
const BasicBlock *BB) {
169 if (
const CallInst *CI = dyn_cast<CallInst>(&
I))
170 if (CI->getIntrinsicID() == Intrinsic::amdgcn_kill)
177bool SIAnnotateControlFlow::eraseIfUnused(
PHINode *Phi) {
185bool SIAnnotateControlFlow::openIf(
BranchInst *Term) {
190 Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, IntMask),
191 {
Term->getCondition()});
192 Value *
Cond = IRB.CreateExtractValue(IfCall, {0});
193 Value *
Mask = IRB.CreateExtractValue(IfCall, {1});
200bool SIAnnotateControlFlow::insertElse(
BranchInst *Term) {
201 if (isUniform(Term)) {
206 Value *ElseCall = IRB.CreateCall(
207 getDecl(Else, Intrinsic::amdgcn_else, {IntMask, IntMask}), {popSaved()});
208 Value *
Cond = IRB.CreateExtractValue(ElseCall, {0});
209 Value *
Mask = IRB.CreateExtractValue(ElseCall, {1});
216Value *SIAnnotateControlFlow::handleLoopCondition(
221 getDecl(IfBreak, Intrinsic::amdgcn_if_break, IntMask), {
Cond, Broken});
227 if (LI->getLoopFor(Parent) == L) {
232 }
else if (
L->contains(Inst)) {
235 Insert =
L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
238 return CreateBreak(Insert);
242 if (isa<Constant>(
Cond)) {
244 Term :
L->getHeader()->getTerminator();
246 return CreateBreak(Insert);
249 if (isa<Argument>(
Cond)) {
251 return CreateBreak(Insert);
258bool SIAnnotateControlFlow::handleLoop(
BranchInst *Term) {
272 Term->setCondition(BoolTrue);
273 Value *Arg = handleLoopCondition(
Cond, Broken, L, Term);
276 Value *PHIValue = IntMaskZero;
282 else if (
L->contains(Pred) && DT->
dominates(Pred, BB))
288 getDecl(
Loop, Intrinsic::amdgcn_loop, IntMask), {Arg});
289 Term->setCondition(LoopCall);
297bool SIAnnotateControlFlow::closeControlFlow(
BasicBlock *BB) {
302 if (L &&
L->getHeader() == BB) {
306 SmallVector <BasicBlock *, 8> Latches;
307 L->getLoopLatches(Latches);
321 if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt)) {
328 IRBuilder<> IRB(FirstInsertionPt->getParent(), FirstInsertionPt);
332 IRB.SetCurrentDebugLocation(
DebugLoc());
333 IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, IntMask), {
Exec});
341bool SIAnnotateControlFlow::run() {
342 bool Changed =
false;
345 E =
df_end(&
F->getEntryBlock());
350 if (!Term ||
Term->isUnconditional()) {
351 if (isTopOfStack(BB))
352 Changed |= closeControlFlow(BB);
357 if (
I.nodeVisited(
Term->getSuccessor(1))) {
358 if (isTopOfStack(BB))
359 Changed |= closeControlFlow(BB);
362 Changed |= handleLoop(Term);
366 if (isTopOfStack(BB)) {
368 if (Phi &&
Phi->getParent() == BB && isElse(Phi) && !hasKill(BB)) {
369 Changed |= insertElse(Term);
370 Changed |= eraseIfUnused(Phi);
374 Changed |= closeControlFlow(BB);
377 Changed |= openIf(Term);
380 if (!
Stack.empty()) {
396 SIAnnotateControlFlow Impl(
F, ST, DT, LI, UI);
398 bool Changed = Impl.run();
423 FunctionPass::getAnalysisUsage(AU);
427 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
428 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
430 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
435 SIAnnotateControlFlow Impl(
F, ST, DT, LI, UI);
441 "Annotate SI Control Flow",
false,
false)
The AMDGPU TargetMachine interface definition for hw codegen targets.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Fixup Statepoint Caller Saved
AMD GCN specific subclass of TargetSubtarget.
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
SIAnnotateControlFlowLegacy()
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Conditional or Unconditional Branch instruction.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
DomTreeNodeBase * getIDom() const
Analysis pass which computes a DominatorTree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
This is an important class for using LLVM in a threaded context.
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Primary interface to the complete machine description for the target machine.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt1Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
'undef' values are things that do not have specified contents.
LLVM Value Representation.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createSIAnnotateControlFlowLegacyPass()
Create the annotation pass.
df_iterator< T > df_begin(const T &G)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
df_iterator< T > df_end(const T &G)
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...