LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUAnnotateUniformValues.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 85 88 96.6 %
Date: 2017-09-14 15:23:50 Functions: 13 15 86.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass adds amdgpu.uniform metadata to IR values so this information
      12             : /// can be used during instruction selection.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPU.h"
      17             : #include "AMDGPUIntrinsicInfo.h"
      18             : #include "llvm/ADT/SetVector.h"
      19             : #include "llvm/Analysis/DivergenceAnalysis.h"
      20             : #include "llvm/Analysis/LoopInfo.h"
      21             : #include "llvm/Analysis/MemoryDependenceAnalysis.h"
      22             : #include "llvm/IR/IRBuilder.h"
      23             : #include "llvm/IR/InstVisitor.h"
      24             : #include "llvm/Support/Debug.h"
      25             : #include "llvm/Support/raw_ostream.h"
      26             : 
      27             : #define DEBUG_TYPE "amdgpu-annotate-uniform"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : namespace {
      32             : 
      33        4380 : class AMDGPUAnnotateUniformValues : public FunctionPass,
      34             :                        public InstVisitor<AMDGPUAnnotateUniformValues> {
      35             :   DivergenceAnalysis *DA;
      36             :   MemoryDependenceResults *MDR;
      37             :   LoopInfo *LI;
      38             :   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
      39             :   bool isKernelFunc;
      40             :   AMDGPUAS AMDGPUASI;
      41             : 
      42             : public:
      43             :   static char ID;
      44        1468 :   AMDGPUAnnotateUniformValues() :
      45        4404 :     FunctionPass(ID) { }
      46             :   bool doInitialization(Module &M) override;
      47             :   bool runOnFunction(Function &F) override;
      48           0 :   StringRef getPassName() const override {
      49           0 :     return "AMDGPU Annotate Uniform Values";
      50             :   }
      51        1462 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      52        1462 :     AU.addRequired<DivergenceAnalysis>();
      53        1462 :     AU.addRequired<MemoryDependenceWrapperPass>();
      54        1462 :     AU.addRequired<LoopInfoWrapperPass>();
      55        1462 :     AU.setPreservesAll();
      56        1462 :  }
      57             : 
      58             :   void visitBranchInst(BranchInst &I);
      59             :   void visitLoadInst(LoadInst &I);
      60             :   bool isClobberedInFunction(LoadInst * Load);
      61             : };
      62             : 
      63             : } // End anonymous namespace
      64             : 
      65       53042 : INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
      66             :                       "Add AMDGPU uniform metadata", false, false)
      67       53042 : INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
      68       53042 : INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
      69       53042 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
      70      312538 : INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
      71             :                     "Add AMDGPU uniform metadata", false, false)
      72             : 
      73             : char AMDGPUAnnotateUniformValues::ID = 0;
      74             : 
      75        5949 : static void setUniformMetadata(Instruction *I) {
      76       17847 :   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
      77        5949 : }
      78        4168 : static void setNoClobberMetadata(Instruction *I) {
      79       12504 :   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
      80        4168 : }
      81             : 
      82        7548 : static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
      83       16026 :   for (auto I : predecessors(Root))
      84         465 :     if (Set.insert(I))
      85         360 :       DFS(I, Set);
      86        7548 : }
      87             : 
      88        7188 : bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
      89             :   // 1. get Loop for the Load->getparent();
      90             :   // 2. if it exists, collect all the BBs from the most outer
      91             :   // loop and check for the writes. If NOT - start DFS over all preds.
      92             :   // 3. Start DFS over all preds from the most outer loop header.
      93       14376 :   SetVector<BasicBlock *> Checklist;
      94        7188 :   BasicBlock *Start = Load->getParent();
      95        7188 :   Checklist.insert(Start);
      96        7188 :   const Value *Ptr = Load->getPointerOperand();
      97       14376 :   const Loop *L = LI->getLoopFor(Start);
      98          76 :   if (L) {
      99             :     const Loop *P = L;
     100             :     do {
     101          77 :       L = P;
     102          77 :       P = P->getParentLoop();
     103          77 :     } while (P);
     104         228 :     Checklist.insert(L->block_begin(), L->block_end());
     105         152 :     Start = L->getHeader();
     106             :   }
     107             : 
     108        7188 :   DFS(Start, Checklist);
     109       34652 :   for (auto &BB : Checklist) {
     110        7494 :     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
     111         764 :       BasicBlock::iterator(Load) : BB->end();
     112       29976 :     auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
     113       14988 :                                            StartIt, BB, Load);
     114        7494 :     if (Q.isClobber() || Q.isUnknown())
     115        1594 :       return true;
     116             :   }
     117             :   return false;
     118             : }
     119             : 
     120        1956 : void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
     121        1956 :   if (I.isUnconditional())
     122             :     return;
     123             : 
     124         943 :   Value *Cond = I.getCondition();
     125        1886 :   if (!DA->isUniform(Cond))
     126             :     return;
     127             : 
     128        1062 :   setUniformMetadata(I.getParent()->getTerminator());
     129             : }
     130             : 
     131       11565 : void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
     132       11565 :   Value *Ptr = I.getPointerOperand();
     133       23130 :   if (!DA->isUniform(Ptr))
     134        3954 :     return;
     135             :   auto isGlobalLoad = [&](LoadInst &Load)->bool {
     136        4274 :     return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
     137        7611 :   };
     138             :   // We're tracking up to the Function boundaries
     139             :   // We cannot go beyond because of FunctionPass restrictions
     140             :   // Thus we can ensure that memory not clobbered for memory
     141             :   // operations that live in kernel only.
     142        7611 :   bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
     143       15222 :   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
     144       11885 :   if (!PtrI && NotClobbered && isGlobalLoad(I)) {
     145        6216 :     if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
     146             :       // Lookup for the existing GEP
     147        5696 :       if (noClobberClones.count(Ptr)) {
     148           0 :         PtrI = noClobberClones[Ptr];
     149             :       } else {
     150             :         // Create GEP of the Value
     151        2848 :         Function *F = I.getParent()->getParent();
     152        2848 :         Value *Idx = Constant::getIntegerValue(
     153        8544 :           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
     154             :         // Insert GEP at the entry to make it dominate all uses
     155        8544 :         PtrI = GetElementPtrInst::Create(
     156             :           Ptr->getType()->getPointerElementType(), Ptr,
     157        8544 :           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
     158             :       }
     159        2848 :       I.replaceUsesOfWith(Ptr, PtrI);
     160             :     }
     161             :   }
     162             : 
     163        7611 :   if (PtrI) {
     164        5418 :     setUniformMetadata(PtrI);
     165        5418 :     if (NotClobbered)
     166        4168 :       setNoClobberMetadata(PtrI);
     167             :   }
     168             : }
     169             : 
     170        1462 : bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
     171        1462 :   AMDGPUASI = AMDGPU::getAMDGPUAS(M);
     172        1462 :   return false;
     173             : }
     174             : 
     175       14840 : bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
     176       14840 :   if (skipFunction(F))
     177             :     return false;
     178             : 
     179       14838 :   DA  = &getAnalysis<DivergenceAnalysis>();
     180       29676 :   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
     181       29676 :   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     182       14838 :   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
     183             : 
     184       14838 :   visit(F);
     185       14838 :   noClobberClones.clear();
     186       14838 :   return true;
     187             : }
     188             : 
     189             : FunctionPass *
     190        1468 : llvm::createAMDGPUAnnotateUniformValues() {
     191        2936 :   return new AMDGPUAnnotateUniformValues();
     192             : }

Generated by: LCOV version 1.13