LLVM 20.0.0git
SIAnnotateControlFlow.cpp
Go to the documentation of this file.
1//===- SIAnnotateControlFlow.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Annotates the control flow with hardware specific intrinsics.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUTargetMachine.h"
16#include "GCNSubtarget.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/Constants.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/IRBuilder.h"
24#include "llvm/IR/IntrinsicsAMDGPU.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "si-annotate-control-flow"
33
34namespace {
35
36// Complex types used in this pass
37using StackEntry = std::pair<BasicBlock *, Value *>;
38using StackVector = SmallVector<StackEntry, 16>;
39
40class SIAnnotateControlFlow {
41private:
43
45 Type *Void;
46 Type *IntMask;
47 Type *ReturnStruct;
48
49 ConstantInt *BoolTrue;
50 ConstantInt *BoolFalse;
51 UndefValue *BoolUndef;
52 Constant *IntMaskZero;
53
54 Function *If;
55 Function *Else;
56 Function *IfBreak;
58 Function *EndCf;
59
60 DominatorTree *DT;
61 StackVector Stack;
62
63 LoopInfo *LI;
64
65 void initialize(Module &M, const GCNSubtarget &ST);
66
67 bool isUniform(BranchInst *T);
68
69 bool isTopOfStack(BasicBlock *BB);
70
71 Value *popSaved();
72
73 void push(BasicBlock *BB, Value *Saved);
74
75 bool isElse(PHINode *Phi);
76
77 bool hasKill(const BasicBlock *BB);
78
79 bool eraseIfUnused(PHINode *Phi);
80
81 bool openIf(BranchInst *Term);
82
83 bool insertElse(BranchInst *Term);
84
85 Value *
86 handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
87 BranchInst *Term);
88
89 bool handleLoop(BranchInst *Term);
90
91 bool closeControlFlow(BasicBlock *BB);
92
93public:
94 SIAnnotateControlFlow(Module &M, const GCNSubtarget &ST, DominatorTree &DT,
95 LoopInfo &LI, UniformityInfo &UA)
96 : UA(&UA), DT(&DT), LI(&LI) {
97 initialize(M, ST);
98 }
99
100 bool run(Function &F);
101};
102
103} // end anonymous namespace
104
105/// Initialize all the types and constants used in the pass
106void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
107 LLVMContext &Context = M.getContext();
108
109 Void = Type::getVoidTy(Context);
110 Boolean = Type::getInt1Ty(Context);
111 IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
112 : Type::getInt64Ty(Context);
113 ReturnStruct = StructType::get(Boolean, IntMask);
114
115 BoolTrue = ConstantInt::getTrue(Context);
116 BoolFalse = ConstantInt::getFalse(Context);
117 BoolUndef = PoisonValue::get(Boolean);
118 IntMaskZero = ConstantInt::get(IntMask, 0);
119
120 If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
121 Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
122 { IntMask, IntMask });
123 IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
124 { IntMask });
125 Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
126 EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
127}
128
129/// Is the branch condition uniform or did the StructurizeCFG pass
130/// consider it as such?
131bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
132 return UA->isUniform(T) || T->hasMetadata("structurizecfg.uniform");
133}
134
135/// Is BB the last block saved on the stack ?
136bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) {
137 return !Stack.empty() && Stack.back().first == BB;
138}
139
140/// Pop the last saved value from the control flow stack
141Value *SIAnnotateControlFlow::popSaved() {
142 return Stack.pop_back_val().second;
143}
144
145/// Push a BB and saved value to the control flow stack
146void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) {
147 Stack.push_back(std::pair(BB, Saved));
148}
149
150/// Can the condition represented by this PHI node treated like
151/// an "Else" block?
152bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
153 BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock();
154 for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
155 if (Phi->getIncomingBlock(i) == IDom) {
156
157 if (Phi->getIncomingValue(i) != BoolTrue)
158 return false;
159
160 } else {
161 if (Phi->getIncomingValue(i) != BoolFalse)
162 return false;
163
164 }
165 }
166 return true;
167}
168
169bool SIAnnotateControlFlow::hasKill(const BasicBlock *BB) {
170 for (const Instruction &I : *BB) {
171 if (const CallInst *CI = dyn_cast<CallInst>(&I))
172 if (CI->getIntrinsicID() == Intrinsic::amdgcn_kill)
173 return true;
174 }
175 return false;
176}
177
178// Erase "Phi" if it is not used any more. Return true if any change was made.
179bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
180 bool Changed = RecursivelyDeleteDeadPHINode(Phi);
181 if (Changed)
182 LLVM_DEBUG(dbgs() << "Erased unused condition phi\n");
183 return Changed;
184}
185
186/// Open a new "If" block
187bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
188 if (isUniform(Term))
189 return false;
190
191 IRBuilder<> IRB(Term);
192 Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
193 Value *Cond = IRB.CreateExtractValue(IfCall, {0});
194 Value *Mask = IRB.CreateExtractValue(IfCall, {1});
195 Term->setCondition(Cond);
196 push(Term->getSuccessor(1), Mask);
197 return true;
198}
199
200/// Close the last "If" block and open a new "Else" block
201bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
202 if (isUniform(Term)) {
203 return false;
204 }
205
206 IRBuilder<> IRB(Term);
207 Value *ElseCall = IRB.CreateCall(Else, {popSaved()});
208 Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
209 Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
210 Term->setCondition(Cond);
211 push(Term->getSuccessor(1), Mask);
212 return true;
213}
214
215/// Recursively handle the condition leading to a loop
216Value *SIAnnotateControlFlow::handleLoopCondition(
217 Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
218
219 auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
220 return IRBuilder<>(I).CreateCall(IfBreak, {Cond, Broken});
221 };
222
223 if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
224 BasicBlock *Parent = Inst->getParent();
226 if (L->contains(Inst)) {
227 Insert = Parent->getTerminator();
228 } else {
229 Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
230 }
231
232 return CreateBreak(Insert);
233 }
234
235 // Insert IfBreak in the loop header TERM for constant COND other than true.
236 if (isa<Constant>(Cond)) {
237 Instruction *Insert = Cond == BoolTrue ?
238 Term : L->getHeader()->getTerminator();
239
240 return CreateBreak(Insert);
241 }
242
243 if (isa<Argument>(Cond)) {
244 Instruction *Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
245 return CreateBreak(Insert);
246 }
247
248 llvm_unreachable("Unhandled loop condition!");
249}
250
251/// Handle a back edge (loop)
252bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
253 if (isUniform(Term))
254 return false;
255
256 BasicBlock *BB = Term->getParent();
257 llvm::Loop *L = LI->getLoopFor(BB);
258 if (!L)
259 return false;
260
261 BasicBlock *Target = Term->getSuccessor(1);
262 PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken");
263 Broken->insertBefore(Target->begin());
264
265 Value *Cond = Term->getCondition();
266 Term->setCondition(BoolTrue);
267 Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
268
269 for (BasicBlock *Pred : predecessors(Target)) {
270 Value *PHIValue = IntMaskZero;
271 if (Pred == BB) // Remember the value of the previous iteration.
272 PHIValue = Arg;
273 // If the backedge from Pred to Target could be executed before the exit
274 // of the loop at BB, it should not reset or change "Broken", which keeps
275 // track of the number of threads exited the loop at BB.
276 else if (L->contains(Pred) && DT->dominates(Pred, BB))
277 PHIValue = Broken;
278 Broken->addIncoming(PHIValue, Pred);
279 }
280
281 CallInst *LoopCall = IRBuilder<>(Term).CreateCall(Loop, {Arg});
282 Term->setCondition(LoopCall);
283
284 push(Term->getSuccessor(0), Arg);
285
286 return true;
287}
288
289/// Close the last opened control flow
290bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
291 llvm::Loop *L = LI->getLoopFor(BB);
292
293 assert(Stack.back().first == BB);
294
295 if (L && L->getHeader() == BB) {
296 // We can't insert an EndCF call into a loop header, because it will
297 // get executed on every iteration of the loop, when it should be
298 // executed only once before the loop.
299 SmallVector <BasicBlock *, 8> Latches;
300 L->getLoopLatches(Latches);
301
303 for (BasicBlock *Pred : predecessors(BB)) {
304 if (!is_contained(Latches, Pred))
305 Preds.push_back(Pred);
306 }
307
308 BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr,
309 false);
310 }
311
312 Value *Exec = popSaved();
313 BasicBlock::iterator FirstInsertionPt = BB->getFirstInsertionPt();
314 if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt)) {
315 Instruction *ExecDef = cast<Instruction>(Exec);
316 BasicBlock *DefBB = ExecDef->getParent();
317 if (!DT->dominates(DefBB, BB)) {
318 // Split edge to make Def dominate Use
319 FirstInsertionPt = SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt();
320 }
321 IRBuilder<> IRB(FirstInsertionPt->getParent(), FirstInsertionPt);
322 // TODO: StructurizeCFG 'Flow' blocks have debug locations from the
323 // condition, for now just avoid copying these DebugLocs so that stepping
324 // out of the then/else block in a debugger doesn't step to the condition.
325 IRB.SetCurrentDebugLocation(DebugLoc());
326 IRB.CreateCall(EndCf, {Exec});
327 }
328
329 return true;
330}
331
332/// Annotate the control flow with intrinsics so the backend can
333/// recognize if/then/else and loops.
334bool SIAnnotateControlFlow::run(Function &F) {
335 bool Changed = false;
336
337 for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
338 E = df_end(&F.getEntryBlock()); I != E; ++I) {
339 BasicBlock *BB = *I;
340 BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
341
342 if (!Term || Term->isUnconditional()) {
343 if (isTopOfStack(BB))
344 Changed |= closeControlFlow(BB);
345
346 continue;
347 }
348
349 if (I.nodeVisited(Term->getSuccessor(1))) {
350 if (isTopOfStack(BB))
351 Changed |= closeControlFlow(BB);
352
353 if (DT->dominates(Term->getSuccessor(1), BB))
354 Changed |= handleLoop(Term);
355 continue;
356 }
357
358 if (isTopOfStack(BB)) {
359 PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
360 if (Phi && Phi->getParent() == BB && isElse(Phi) && !hasKill(BB)) {
361 Changed |= insertElse(Term);
362 Changed |= eraseIfUnused(Phi);
363 continue;
364 }
365
366 Changed |= closeControlFlow(BB);
367 }
368
369 Changed |= openIf(Term);
370 }
371
372 if (!Stack.empty()) {
373 // CFG was probably not structured.
374 report_fatal_error("failed to annotate CFG");
375 }
376
377 return Changed;
378}
379
382 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
383
387
388 SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
389
390 // FIXME: We introduce dead declarations of intrinsics even if never used.
391 bool Changed = Impl.run(F);
392 if (!Changed)
394
395 // TODO: Is LoopInfo preserved?
398 return PA;
399}
400
402public:
403 static char ID;
404
406
407 StringRef getPassName() const override { return "SI annotate control flow"; }
408
409 void getAnalysisUsage(AnalysisUsage &AU) const override {
416 FunctionPass::getAnalysisUsage(AU);
417 }
418
419 bool runOnFunction(Function &F) override {
420 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
421 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
422 UniformityInfo &UI =
423 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
424 TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
425 const TargetMachine &TM = TPC.getTM<TargetMachine>();
426 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
427
428 SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
429 return Impl.run(F);
430 }
431};
432
434 "Annotate SI Control Flow", false, false)
439 "Annotate SI Control Flow", false, false)
440
442
443/// Create the annotation pass
445 return new SIAnnotateControlFlowLegacy();
446}
The AMDGPU TargetMachine interface definition for hw codegen targets.
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Fixup Statepoint Caller Saved
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
Annotate SI Control Flow
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Conditional or Unconditional Branch instruction.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:42
A debug info location.
Definition: DebugLoc.h:33
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:571
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:598
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:361
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt1Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
'undef' values are things that do not have specified contents.
Definition: Constants.h:1398
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM Value Representation.
Definition: Value.h:74
const ParentTy * getParent() const
Definition: ilist_node.h:32
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createSIAnnotateControlFlowLegacyPass()
Create the annotation pass.
df_iterator< T > df_begin(const T &G)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
unsigned char Boolean
Definition: ConvertUTF.h:131
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is an effectively dead PHI node, due to being a def-use chain of single-use no...
Definition: Local.cpp:651
df_iterator< T > df_end(const T &G)
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...