LCOV - llvm-toolchain.info - lib/Analysis/DivergenceAnalysis.cpp

LCOV - code coverage report

Current view:	top level - lib/Analysis - DivergenceAnalysis.cpp (source / functions)		Hit	Total	Coverage
Test:	llvm-toolchain.info	Lines:	102	123	82.9 %
Date:	2018-10-20 13:21:21	Functions:	17	19	89.5 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : //===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements a general divergence analysis for loop vectorization
      11             : // and GPU programs. It determines which branches and values in a loop or GPU
      12             : // program are divergent. It can help branch optimizations such as jump
      13             : // threading and loop unswitching to make better decisions.
      14             : //
      15             : // GPU programs typically use the SIMD execution model, where multiple threads
      16             : // in the same execution group have to execute in lock-step. Therefore, if the
      17             : // code contains divergent branches (i.e., threads in a group do not agree on
      18             : // which path of the branch to take), the group of threads has to execute all
      19             : // the paths from that branch with different subsets of threads enabled until
      20             : // they re-converge.
      21             : //
      22             : // Due to this execution model, some optimizations such as jump
      23             : // threading and loop unswitching can interfere with thread re-convergence.
      24             : // Therefore, an analysis that computes which branches in a GPU program are
      25             : // divergent can help the compiler to selectively run these optimizations.
      26             : //
      27             : // This implementation is derived from the Vectorization Analysis of the
      28             : // Region Vectorizer (RV). That implementation in turn is based on the approach
      29             : // described in
      30             : //
      31             : //   Improving Performance of OpenCL on CPUs
      32             : //   Ralf Karrenberg and Sebastian Hack
      33             : //   CC '12
      34             : //
      35             : // This DivergenceAnalysis implementation is generic in the sense that it does
      36             : // not itself identify original sources of divergence.
      37             : // Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
      38             : // (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
      39             : // (e.g., special variables that hold the thread ID or the iteration variable).
      40             : //
      41             : // The generic implementation propagates divergence to variables that are data
      42             : // or sync dependent on a source of divergence.
      43             : //
      44             : // While data dependency is a well-known concept, the notion of sync dependency
      45             : // is worth more explanation. Sync dependence characterizes the control flow
      46             : // aspect of the propagation of branch divergence. For example,
      47             : //
      48             : //   %cond = icmp slt i32 %tid, 10
      49             : //   br i1 %cond, label %then, label %else
      50             : // then:
      51             : //   br label %merge
      52             : // else:
      53             : //   br label %merge
      54             : // merge:
      55             : //   %a = phi i32 [ 0, %then ], [ 1, %else ]
      56             : //
      57             : // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
      58             : // because %tid is not on its use-def chains, %a is sync dependent on %tid
      59             : // because the branch "br i1 %cond" depends on %tid and affects which value %a
      60             : // is assigned to.
      61             : //
      62             : // The sync dependence detection (which branch induces divergence in which join
      63             : // points) is implemented in the SyncDependenceAnalysis.
      64             : //
      65             : // The current DivergenceAnalysis implementation has the following limitations:
      66             : // 1. intra-procedural. It conservatively considers the arguments of a
      67             : //    non-kernel-entry function and the return value of a function call as
      68             : //    divergent.
      69             : // 2. memory as black box. It conservatively considers values loaded from
      70             : //    generic or local address as divergent. This can be improved by leveraging
      71             : //    pointer analysis and/or by modelling non-escaping memory objects in SSA
      72             : //    as done in RV.
      73             : //
      74             : //===----------------------------------------------------------------------===//
      75             : 
      76             : #include "llvm/Analysis/DivergenceAnalysis.h"
      77             : #include "llvm/Analysis/LoopInfo.h"
      78             : #include "llvm/Analysis/Passes.h"
      79             : #include "llvm/Analysis/PostDominators.h"
      80             : #include "llvm/Analysis/TargetTransformInfo.h"
      81             : #include "llvm/IR/Dominators.h"
      82             : #include "llvm/IR/InstIterator.h"
      83             : #include "llvm/IR/Instructions.h"
      84             : #include "llvm/IR/IntrinsicInst.h"
      85             : #include "llvm/IR/Value.h"
      86             : #include "llvm/Support/Debug.h"
      87             : #include "llvm/Support/raw_ostream.h"
      88             : #include <vector>
      89             : 
      90             : using namespace llvm;
      91             : 
      92             : #define DEBUG_TYPE "divergence-analysis"
      93             : 
      94             : // class DivergenceAnalysis
      95          13 : DivergenceAnalysis::DivergenceAnalysis(
      96             :     const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
      97          13 :     const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
      98             :     : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
      99          13 :       IsLCSSAForm(IsLCSSAForm) {}
     100             : 
     101          39 : void DivergenceAnalysis::markDivergent(const Value &DivVal) {
     102             :   assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
     103             :   assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
     104          39 :   DivergentValues.insert(&DivVal);
     105          39 : }
     106             : 
     107           0 : void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
     108           0 :   UniformOverrides.insert(&UniVal);
     109           0 : }
     110             : 
     111          11 : bool DivergenceAnalysis::updateTerminator(const TerminatorInst &Term) const {
     112          11 :   if (Term.getNumSuccessors() <= 1)
     113             :     return false;
     114             :   if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
     115             :     assert(BranchTerm->isConditional());
     116           9 :     return isDivergent(*BranchTerm->getCondition());
     117             :   }
     118             :   if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
     119           1 :     return isDivergent(*SwitchTerm->getCondition());
     120             :   }
     121           0 :   if (isa<InvokeInst>(Term)) {
     122             :     return false; // ignore abnormal executions through landingpad
     123             :   }
     124             : 
     125           0 :   llvm_unreachable("unexpected terminator");
     126             : }
     127             : 
     128           3 : bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
     129             :   // TODO function calls with side effects, etc
     130           8 :   for (const auto &Op : I.operands()) {
     131           5 :     if (isDivergent(*Op))
     132             :       return true;
     133             :   }
     134             :   return false;
     135             : }
     136             : 
     137           1 : bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
     138             :                                              const Value &Val) const {
     139             :   const auto *Inst = dyn_cast<const Instruction>(&Val);
     140             :   if (!Inst)
     141             :     return false;
     142             :   // check whether any divergent loop carrying Val terminates before control
     143             :   // proceeds to ObservingBlock
     144           1 :   for (const auto *Loop = LI.getLoopFor(Inst->getParent());
     145           2 :        Loop != RegionLoop && !Loop->contains(&ObservingBlock);
     146           0 :        Loop = Loop->getParentLoop()) {
     147           1 :     if (DivergentLoops.find(Loop) != DivergentLoops.end())
     148             :       return true;
     149             :   }
     150             : 
     151             :   return false;
     152             : }
     153             : 
     154          12 : bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
     155             :   // joining divergent disjoint path in Phi parent block
     156          12 :   if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
     157             :     return true;
     158             :   }
     159             : 
     160             :   // An incoming value could be divergent by itself.
     161             :   // Otherwise, an incoming value could be uniform within the loop
     162             :   // that carries its definition but it may appear divergent
     163             :   // from outside the loop. This happens when divergent loop exits
     164             :   // drop definitions of that uniform value in different iterations.
     165             :   //
     166             :   // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
     167             :   //   if (i % thread_id == 0) break;    // divergent loop exit
     168             :   // }
     169             :   // int divI = i;                 // divI is divergent
     170           1 :   for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
     171             :     const auto *InVal = Phi.getIncomingValue(i);
     172           2 :     if (isDivergent(*Phi.getIncomingValue(i)) ||
     173           1 :         isTemporalDivergent(*Phi.getParent(), *InVal)) {
     174           1 :       return true;
     175             :     }
     176             :   }
     177             :   return false;
     178             : }
     179             : 
     180          13 : bool DivergenceAnalysis::inRegion(const Instruction &I) const {
     181          13 :   return I.getParent() && inRegion(*I.getParent());
     182             : }
     183             : 
     184          29 : bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
     185          29 :   return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
     186             : }
     187             : 
     188             : // marks all users of loop-carried values of the loop headed by LoopHeader as
     189             : // divergent
     190           1 : void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
     191           1 :   auto *DivLoop = LI.getLoopFor(&LoopHeader);
     192             :   assert(DivLoop && "loopHeader is not actually part of a loop");
     193             : 
     194             :   SmallVector<BasicBlock *, 8> TaintStack;
     195           1 :   DivLoop->getExitBlocks(TaintStack);
     196             : 
     197             :   // Otherwise potential users of loop-carried values could be anywhere in the
     198             :   // dominance region of DivLoop (including its fringes for phi nodes)
     199             :   DenseSet<const BasicBlock *> Visited;
     200           2 :   for (auto *Block : TaintStack) {
     201           1 :     Visited.insert(Block);
     202             :   }
     203           1 :   Visited.insert(&LoopHeader);
     204             : 
     205           2 :   while (!TaintStack.empty()) {
     206           1 :     auto *UserBlock = TaintStack.back();
     207             :     TaintStack.pop_back();
     208             : 
     209             :     // don't spread divergence beyond the region
     210           1 :     if (!inRegion(*UserBlock))
     211             :       continue;
     212             : 
     213             :     assert(!DivLoop->contains(UserBlock) &&
     214             :            "irreducible control flow detected");
     215             : 
     216             :     // phi nodes at the fringes of the dominance region
     217           1 :     if (!DT.dominates(&LoopHeader, UserBlock)) {
     218             :       // all PHI nodes of UserBlock become divergent
     219           0 :       for (auto &Phi : UserBlock->phis()) {
     220           0 :         Worklist.push_back(&Phi);
     221             :       }
     222             :       continue;
     223             :     }
     224             : 
     225             :     // taint outside users of values carried by DivLoop
     226           2 :     for (auto &I : *UserBlock) {
     227           1 :       if (isAlwaysUniform(I))
     228             :         continue;
     229           1 :       if (isDivergent(I))
     230             :         continue;
     231             : 
     232           2 :       for (auto &Op : I.operands()) {
     233             :         auto *OpInst = dyn_cast<Instruction>(&Op);
     234             :         if (!OpInst)
     235             :           continue;
     236           1 :         if (DivLoop->contains(OpInst->getParent())) {
     237           1 :           markDivergent(I);
     238           1 :           pushUsers(I);
     239           1 :           break;
     240             :         }
     241             :       }
     242             :     }
     243             : 
     244             :     // visit all blocks in the dominance region
     245           2 :     for (auto *SuccBlock : successors(UserBlock)) {
     246           0 :       if (!Visited.insert(SuccBlock).second) {
     247             :         continue;
     248             :       }
     249           0 :       TaintStack.push_back(SuccBlock);
     250             :     }
     251             :   }
     252           1 : }
     253             : 
     254          13 : void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
     255          25 :   for (const auto &Phi : Block.phis()) {
     256          12 :     if (isDivergent(Phi))
     257             :       continue;
     258          12 :     Worklist.push_back(&Phi);
     259             :   }
     260          13 : }
     261             : 
     262          29 : void DivergenceAnalysis::pushUsers(const Value &V) {
     263          42 :   for (const auto *User : V.users()) {
     264          13 :     const auto *UserInst = dyn_cast<const Instruction>(User);
     265          13 :     if (!UserInst)
     266           0 :       continue;
     267             : 
     268          13 :     if (isDivergent(*UserInst))
     269             :       continue;
     270             : 
     271             :     // only compute divergent inside loop
     272          13 :     if (!inRegion(*UserInst))
     273             :       continue;
     274          13 :     Worklist.push_back(UserInst);
     275             :   }
     276          29 : }
     277             : 
     278          13 : bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
     279             :                                                  const Loop *BranchLoop) {
     280             :   LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
     281             : 
     282             :   // ignore divergence outside the region
     283          13 :   if (!inRegion(JoinBlock)) {
     284             :     return false;
     285             :   }
     286             : 
     287             :   // push non-divergent phi nodes in JoinBlock to the worklist
     288          13 :   pushPHINodes(JoinBlock);
     289             : 
     290             :   // JoinBlock is a divergent loop exit
     291          15 :   if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
     292             :     return true;
     293             :   }
     294             : 
     295             :   // disjoint-paths divergent at JoinBlock
     296             :   markBlockJoinDivergent(JoinBlock);
     297          11 :   return false;
     298             : }
     299             : 
     300          10 : void DivergenceAnalysis::propagateBranchDivergence(const TerminatorInst &Term) {
     301             :   LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
     302             : 
     303          10 :   markDivergent(Term);
     304             : 
     305          10 :   const auto *BranchLoop = LI.getLoopFor(Term.getParent());
     306             : 
     307             :   // whether there is a divergent loop exit from BranchLoop (if any)
     308             :   bool IsBranchLoopDivergent = false;
     309             : 
     310             :   // iterate over all blocks reachable by disjoint from Term within the loop
     311             :   // also iterates over loop exits that become divergent due to Term.
     312          23 :   for (const auto *JoinBlock : SDA.join_blocks(Term)) {
     313          13 :     IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
     314             :   }
     315             : 
     316             :   // Branch loop is a divergent loop due to the divergent branch in Term
     317          10 :   if (IsBranchLoopDivergent) {
     318             :     assert(BranchLoop);
     319           2 :     if (!DivergentLoops.insert(BranchLoop).second) {
     320           0 :       return;
     321             :     }
     322           2 :     propagateLoopDivergence(*BranchLoop);
     323             :   }
     324             : }
     325             : 
     326           2 : void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
     327             :   LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
     328             : 
     329             :   // don't propagate beyond region
     330           2 :   if (!inRegion(*ExitingLoop.getHeader()))
     331           0 :     return;
     332             : 
     333           2 :   const auto *BranchLoop = ExitingLoop.getParentLoop();
     334             : 
     335             :   // Uses of loop-carried values could occur anywhere
     336             :   // within the dominance region of the definition. All loop-carried
     337             :   // definitions are dominated by the loop header (reducible control).
     338             :   // Thus all users have to be in the dominance region of the loop header,
     339             :   // except PHI nodes that can also live at the fringes of the dom region
     340             :   // (incoming defining value).
     341           2 :   if (!IsLCSSAForm)
     342           1 :     taintLoopLiveOuts(*ExitingLoop.getHeader());
     343             : 
     344             :   // whether there is a divergent loop exit from BranchLoop (if any)
     345             :   bool IsBranchLoopDivergent = false;
     346             : 
     347             :   // iterate over all blocks reachable by disjoint paths from exits of
     348             :   // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
     349             :   // become divergent.
     350           2 :   for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
     351           0 :     IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
     352             :   }
     353             : 
     354             :   // Branch loop is a divergent due to divergent loop exit in ExitingLoop
     355           2 :   if (IsBranchLoopDivergent) {
     356             :     assert(BranchLoop);
     357           0 :     if (!DivergentLoops.insert(BranchLoop).second) {
     358             :       return;
     359             :     }
     360           0 :     propagateLoopDivergence(*BranchLoop);
     361             :   }
     362             : }
     363             : 
     364          14 : void DivergenceAnalysis::compute() {
     365          27 :   for (auto *DivVal : DivergentValues) {
     366          13 :     pushUsers(*DivVal);
     367             :   }
     368             : 
     369             :   // propagate divergence
     370          39 :   while (!Worklist.empty()) {
     371          25 :     const Instruction &I = *Worklist.back();
     372             :     Worklist.pop_back();
     373             : 
     374             :     // maintain uniformity of overrides
     375          25 :     if (isAlwaysUniform(I))
     376             :       continue;
     377             : 
     378          25 :     bool WasDivergent = isDivergent(I);
     379          25 :     if (WasDivergent)
     380             :       continue;
     381             : 
     382             :     // propagate divergence caused by terminator
     383          25 :     if (isa<TerminatorInst>(I)) {
     384             :       auto &Term = cast<TerminatorInst>(I);
     385          11 :       if (updateTerminator(Term)) {
     386             :         // propagate control divergence to affected instructions
     387          10 :         propagateBranchDivergence(Term);
     388          10 :         continue;
     389             :       }
     390             :     }
     391             : 
     392             :     // update divergence of I due to divergent operands
     393             :     bool DivergentUpd = false;
     394             :     const auto *Phi = dyn_cast<const PHINode>(&I);
     395             :     if (Phi) {
     396          12 :       DivergentUpd = updatePHINode(*Phi);
     397             :     } else {
     398           3 :       DivergentUpd = updateNormalInstruction(I);
     399             :     }
     400             : 
     401             :     // propagate value divergence to users
     402          15 :     if (DivergentUpd) {
     403          15 :       markDivergent(I);
     404          15 :       pushUsers(I);
     405             :     }
     406             :   }
     407          14 : }
     408             : 
     409          26 : bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
     410          26 :   return UniformOverrides.find(&V) != UniformOverrides.end();
     411             : }
     412             : 
     413          93 : bool DivergenceAnalysis::isDivergent(const Value &V) const {
     414          93 :   return DivergentValues.find(&V) != DivergentValues.end();
     415             : }
     416             : 
     417           0 : void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
     418           0 :   if (DivergentValues.empty())
     419             :     return;
     420             :   // iterate instructions using instructions() to ensure a deterministic order.
     421           0 :   for (auto &I : instructions(F)) {
     422           0 :     if (isDivergent(I))
     423           0 :       OS << "DIVERGENT:" << I << '\n';
     424             :   }
     425             : }

Generated by: LCOV version 1.13