90#define DEBUG_TYPE "divergence"
95 :
F(
F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
96 IsLCSSAForm(IsLCSSAForm) {}
101 assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
103 return DivergentValues.
insert(&DivVal).second;
107 UniformOverrides.
insert(&UniVal);
110bool DivergenceAnalysisImpl::isTemporalDivergent(
112 const auto *Inst = dyn_cast<const Instruction>(&Val);
120 if (DivergentLoops.contains(
Loop))
128 return I.getParent() &&
inRegion(*
I.getParent());
135void DivergenceAnalysisImpl::pushUsers(
const Value &V) {
136 const auto *
I = dyn_cast<const Instruction>(&V);
138 if (
I &&
I->isTerminator()) {
139 analyzeControlDivergence(*
I);
143 for (
const auto *
User : V.users()) {
144 const auto *UserInst = dyn_cast<const Instruction>(
User);
154 Worklist.push_back(UserInst);
159 const Loop &DivLoop) {
160 const auto *
I = dyn_cast<const Instruction>(&U);
168void DivergenceAnalysisImpl::analyzeTemporalDivergence(
175 LLVM_DEBUG(
dbgs() <<
"Analyze temporal divergence: " <<
I.getName() <<
"\n");
176 assert((isa<PHINode>(
I) || !IsLCSSAForm) &&
177 "In LCSSA form all users of loop-exiting defs are Phi nodes.");
178 for (
const Use &Op :
I.operands()) {
190void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
194 for (
const auto &Phi : DivExit.
phis()) {
195 analyzeTemporalDivergence(Phi, OuterDivLoop);
218 "irreducible control flow detected");
221 if (!DT.
dominates(&LoopHeader, UserBlock)) {
223 for (
const auto &Phi : UserBlock->phis()) {
224 analyzeTemporalDivergence(Phi, OuterDivLoop);
230 for (
const auto &
I : *UserBlock) {
231 analyzeTemporalDivergence(
I, OuterDivLoop);
235 for (
const auto *SuccBlock :
successors(UserBlock)) {
236 if (!Visited.
insert(SuccBlock).second) {
241 }
while (!TaintStack.
empty());
244void DivergenceAnalysisImpl::propagateLoopExitDivergence(
249 const Loop *DivLoop = &InnerDivLoop;
250 const Loop *OuterDivLoop = DivLoop;
252 const unsigned LoopExitDepth =
254 while (DivLoop && DivLoop->
getLoopDepth() > LoopExitDepth) {
255 DivergentLoops.insert(DivLoop);
256 OuterDivLoop = DivLoop;
262 analyzeLoopExitDivergence(DivExit, *OuterDivLoop);
267void DivergenceAnalysisImpl::taintAndPushPhiNodes(
const BasicBlock &JoinBlock) {
277 for (
const auto &Phi : JoinBlock.
phis()) {
282 if (Phi.hasConstantOrUndefValue())
285 Worklist.push_back(&Phi);
289void DivergenceAnalysisImpl::analyzeControlDivergence(
const Instruction &Term) {
302 for (
const auto *JoinBlock : DivDesc.JoinDivBlocks) {
303 taintAndPushPhiNodes(*JoinBlock);
306 assert(DivDesc.LoopDivBlocks.empty() || BranchLoop);
307 for (
const auto *DivExitBlock : DivDesc.LoopDivBlocks) {
308 propagateLoopExitDivergence(*DivExitBlock, *BranchLoop);
314 auto DivValuesCopy = DivergentValues;
315 for (
const auto *DivVal : DivValuesCopy) {
322 while (!Worklist.empty()) {
333 return UniformOverrides.
contains(&V);
337 return DivergentValues.
contains(&V);
343 return isDivergent(V) || isTemporalDivergent(*
I.getParent(), V);
351 if (!KnownReducible) {
353 RPOTraversal FuncRPOT(&
F);
356 ContainsIrreducible =
true;
360 SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
361 DA = std::make_unique<DivergenceAnalysisImpl>(
F,
nullptr, DT, LI, *SDA,
365 DA->markDivergent(
I);
367 DA->addUniformOverride(
I);
370 for (
auto &
Arg :
F.args()) {
372 DA->markDivergent(
Arg);
394 OS <<
"'Divergence Analysis' for function '" <<
F.getName() <<
"':\n";
395 if (DI.hasDivergence()) {
396 for (
auto &
Arg :
F.args()) {
397 OS << (DI.isDivergent(
Arg) ?
"DIVERGENT: " :
" ");
401 OS <<
"\n " << BB.getName() <<
":\n";
402 for (
const auto &
I : BB.instructionsWithoutDebug()) {
403 OS << (DI.isDivergent(
I) ?
"DIVERGENT: " :
" ");
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static const Instruction * getIfCarriedInstruction(const Use &U, const Loop &DivLoop)
print must be executed print the must be executed context for all instructions
FunctionAnalysisManager FAM
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const Function * getParent() const
Return the enclosing method, or null if none.
Implements a dense probed hash-table based set.
bool inRegion(const BasicBlock &BB) const
Whether BB is part of the region.
bool isAlwaysUniform(const Value &Val) const
Whether Val will always return a uniform value regardless of its operands.
void addUniformOverride(const Value &UniVal)
Mark UniVal as a value that is always uniform.
void compute()
Propagate divergence to all instructions in the region.
bool isDivergentUse(const Use &U) const
Whether U is divergent.
bool markDivergent(const Value &DivVal)
Mark DivVal as a value that is always divergent.
bool isDivergent(const Value &Val) const
Whether Val is divergent at its definition.
DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop, const DominatorTree &DT, const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
This instance will analyze the whole function F or the loop RegionLoop.
Divergence analysis frontend for GPU kernels.
Result run(Function &F, FunctionAnalysisManager &AM)
Runs the divergence analysis on @F, a GPU kernel.
DivergenceInfo(Function &F, const DominatorTree &DT, const PostDominatorTree &PDT, const LoopInfo &LI, const TargetTransformInfo &TTI, bool KnownReducible)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
StringRef getName() const
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Relates points of divergent control to join points in reducible CFGs.
const ControlDivergenceDesc & getJoinBlocks(const Instruction &Term)
Computes divergent join points and loop exits caused by branch divergence in Term.
Analysis pass providing the TargetTransformInfo.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This is an optimization pass for GlobalISel generic memory operations.
auto successors(const MachineBasicBlock *BB)
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
A special type used by analysis passes to provide an address that identifies that particular analysis...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)