Go to the documentation of this file.
86 #define DEBUG_TYPE "divergence"
91 cl::desc(
"turn the LegacyDivergenceAnalysis into "
92 "a wrapper for GPUDivergenceAnalysis"));
102 void populateWithSourcesOfDivergence();
107 void exploreDataDependency(
Value *V);
116 void findUsersOutsideInfluenceRegion(
123 std::vector<Value *> Worklist;
129 void DivergencePropagator::populateWithSourcesOfDivergence() {
135 Worklist.push_back(&
I);
139 for (
auto &
Arg :
F.args()) {
141 Worklist.push_back(&
Arg);
147 void DivergencePropagator::exploreSyncDependency(
Instruction *TI) {
160 if (!
DT.isReachableFromEntry(ThisBB))
170 if (IPostDom ==
nullptr)
173 for (
auto I = IPostDom->
begin(); isa<PHINode>(
I); ++
I) {
176 if (!cast<PHINode>(
I)->hasConstantOrUndefValue() && DV.insert(&*
I).second)
177 Worklist.push_back(&*
I);
200 computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
206 while (InfluenceRegion.
count(InfluencedBB)) {
207 for (
auto &
I : *InfluencedBB) {
209 findUsersOutsideInfluenceRegion(
I, InfluenceRegion);
212 if (IDomNode ==
nullptr)
214 InfluencedBB = IDomNode->
getBlock();
218 void DivergencePropagator::findUsersOutsideInfluenceRegion(
224 if (DV.insert(UserInst).second)
225 Worklist.push_back(UserInst);
235 std::vector<BasicBlock *> &InfluenceStack) {
237 if (Succ != End && InfluenceRegion.
insert(Succ).second)
238 InfluenceStack.push_back(Succ);
242 void DivergencePropagator::computeInfluenceRegion(
245 assert(PDT.properlyDominates(End, Start) &&
246 "End does not properly dominate Start");
251 std::vector<BasicBlock *> InfluenceStack;
252 addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
253 while (!InfluenceStack.empty()) {
255 InfluenceStack.pop_back();
256 addSuccessorsToInfluenceRegion(
BB, End, InfluenceRegion, InfluenceStack);
260 void DivergencePropagator::exploreDataDependency(
Value *V) {
264 Worklist.push_back(U);
268 void DivergencePropagator::propagate() {
270 while (!Worklist.empty()) {
271 Value *V = Worklist.back();
276 if (
I->isTerminator() &&
I->getNumSuccessors() > 1)
277 exploreSyncDependency(
I);
279 exploreDataDependency(V);
291 "Legacy Divergence Analysis",
false,
true)
309 RPOTraversal FuncRPOT(&
F);
321 gpuDA = std::make_unique<DivergenceInfo>(
F, DT, PDT, LI,
TTI,
327 DP.populateWithSourcesOfDivergence();
334 return gpuDA->isDivergent(*V);
341 return gpuDA->isDivergentUse(*U);
354 if (
const Argument *
Arg = dyn_cast<Argument>(FirstDivergentValue)) {
355 F =
Arg->getParent();
357 dyn_cast<Instruction>(FirstDivergentValue)) {
358 F =
I->getParent()->getParent();
363 F = &
gpuDA->getFunction();
369 for (
const auto &
Arg :
F->args()) {
375 OS <<
"\n " <<
BB.getName() <<
":\n";
376 for (
const auto &
I :
BB.instructionsWithoutDebug()) {
392 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
393 if (TTIWP ==
nullptr)
406 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
407 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
408 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
423 DivergentValues.
clear();
424 DivergentUses.clear();
A set of analyses that are preserved following a run of a transformation pass.
This class represents an incoming formal argument to a Function.
Analysis pass providing the TargetTransformInfo.
This is an optimization pass for GlobalISel generic memory operations.
bool shouldUseGPUDivergenceAnalysis(const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
auto successors(const MachineBasicBlock *BB)
void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, PostDominatorTree &PDT, const LoopInfo &LI)
bool isDivergentUse(const Use *U) const
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LegacyDivergenceAnalysis()
The legacy pass manager's analysis pass to compute loop information.
void initializeLegacyDivergenceAnalysisPass(PassRegistry &)
DenseSet< const Value * > DivergentValues
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
DenseSet< const Use * > DivergentUses
iterator begin()
Instruction iterator methods.
Represent the analysis usage information of a pass.
Legacy analysis pass which computes a DominatorTree.
This class implements an extremely fast bulk output stream that can only output to a stream.
User * getUser() const
Returns the User that contains this Use.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const DominatorTreeT & DT
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
inst_range instructions(Function *F)
bool isDivergent(const Value *V) const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
initializer< Ty > init(const Ty &Val)
static cl::opt< bool > UseGPUDA("use-gpu-divergence-analysis", cl::init(false), cl::Hidden, cl::desc("turn the LegacyDivergenceAnalysis into " "a wrapper for GPUDivergenceAnalysis"))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Compute divergence starting with a divergent branch.
A Module instance is used to store all the information related to an LLVM module.
INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence", "Legacy Divergence Analysis", false, true) INITIALIZE_PASS_END(LegacyDivergenceAnalysis
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store load store see def memrix16 in PPCInstrInfo td Load Store Vector load store outs ins lxsdx set load store with conversion from to DP
void print(raw_ostream &OS, const Module *) const
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::unique_ptr< DivergenceInfo > gpuDA
void setPreservesAll()
Set by analyses that do not transform their input at all.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Analysis pass which computes a DominatorTree.
const BasicBlock * getParent() const
FunctionPass * createLegacyDivergenceAnalysisPass()
AnalysisUsage & addRequiredTransitive()
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Analysis pass which computes a PostDominatorTree.
LLVM Value Representation.
iterator_range< user_iterator > users()
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
Analysis pass that exposes the LoopInfo for a function.
A Use represents the edge between a Value definition and its users.
bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI)
Return true if the control flow in RPOTraversal is irreducible.