LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUAnnotateUniformValues.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 51 78 65.4 %
Date: 2018-10-20 13:21:21 Functions: 10 13 76.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This pass adds amdgpu.uniform metadata to IR values so this information
      12             : /// can be used during instruction selection.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPU.h"
      17             : #include "AMDGPUIntrinsicInfo.h"
      18             : #include "llvm/ADT/SetVector.h"
      19             : #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
      20             : #include "llvm/Analysis/LoopInfo.h"
      21             : #include "llvm/Analysis/MemoryDependenceAnalysis.h"
      22             : #include "llvm/IR/IRBuilder.h"
      23             : #include "llvm/IR/InstVisitor.h"
      24             : #include "llvm/Support/Debug.h"
      25             : #include "llvm/Support/raw_ostream.h"
      26             : 
      27             : #define DEBUG_TYPE "amdgpu-annotate-uniform"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : namespace {
      32             : 
      33             : class AMDGPUAnnotateUniformValues : public FunctionPass,
      34             :                        public InstVisitor<AMDGPUAnnotateUniformValues> {
      35             :   LegacyDivergenceAnalysis *DA;
      36             :   MemoryDependenceResults *MDR;
      37             :   LoopInfo *LI;
      38             :   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
      39             :   bool isKernelFunc;
      40             : 
      41             : public:
      42             :   static char ID;
      43        1964 :   AMDGPUAnnotateUniformValues() :
      44        1964 :     FunctionPass(ID) { }
      45             :   bool doInitialization(Module &M) override;
      46             :   bool runOnFunction(Function &F) override;
      47           0 :   StringRef getPassName() const override {
      48           0 :     return "AMDGPU Annotate Uniform Values";
      49             :   }
      50        1954 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      51             :     AU.addRequired<LegacyDivergenceAnalysis>();
      52             :     AU.addRequired<MemoryDependenceWrapperPass>();
      53             :     AU.addRequired<LoopInfoWrapperPass>();
      54             :     AU.setPreservesAll();
      55        1954 :  }
      56             : 
      57             :   void visitBranchInst(BranchInst &I);
      58             :   void visitLoadInst(LoadInst &I);
      59             :   bool isClobberedInFunction(LoadInst * Load);
      60             : };
      61             : 
      62             : } // End anonymous namespace
      63             : 
      64       85105 : INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
      65             :                       "Add AMDGPU uniform metadata", false, false)
      66       85105 : INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
      67       85105 : INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
      68       85105 : INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
      69      199024 : INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
      70             :                     "Add AMDGPU uniform metadata", false, false)
      71             : 
      72             : char AMDGPUAnnotateUniformValues::ID = 0;
      73             : 
      74       28891 : static void setUniformMetadata(Instruction *I) {
      75       57782 :   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
      76       28891 : }
      77       26477 : static void setNoClobberMetadata(Instruction *I) {
      78       52954 :   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
      79       26477 : }
      80             : 
      81       29855 : static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
      82       30570 :   for (auto I : predecessors(Root))
      83         715 :     if (Set.insert(I))
      84         572 :       DFS(I, Set);
      85       29855 : }
      86             : 
      87           0 : bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
      88             :   // 1. get Loop for the Load->getparent();
      89             :   // 2. if it exists, collect all the BBs from the most outer
      90             :   // loop and check for the writes. If NOT - start DFS over all preds.
      91             :   // 3. Start DFS over all preds from the most outer loop header.
      92           0 :   SetVector<BasicBlock *> Checklist;
      93           0 :   BasicBlock *Start = Load->getParent();
      94           0 :   Checklist.insert(Start);
      95             :   const Value *Ptr = Load->getPointerOperand();
      96           0 :   const Loop *L = LI->getLoopFor(Start);
      97           0 :   if (L) {
      98             :     const Loop *P = L;
      99             :     do {
     100             :       L = P;
     101           0 :       P = P->getParentLoop();
     102           0 :     } while (P);
     103           0 :     Checklist.insert(L->block_begin(), L->block_end());
     104           0 :     Start = L->getHeader();
     105             :   }
     106             : 
     107           0 :   DFS(Start, Checklist);
     108           0 :   for (auto &BB : Checklist) {
     109           0 :     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
     110           0 :       BasicBlock::iterator(Load) : BB->end();
     111           0 :     auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
     112           0 :                                            StartIt, BB, Load);
     113           0 :     if (Q.isClobber() || Q.isUnknown())
     114           0 :       return true;
     115             :   }
     116             :   return false;
     117             : }
     118             : 
     119           0 : void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
     120           0 :   if (I.isUnconditional())
     121           0 :     return;
     122             : 
     123             :   Value *Cond = I.getCondition();
     124           0 :   if (!DA->isUniform(Cond))
     125           0 :     return;
     126             : 
     127           0 :   setUniformMetadata(I.getParent()->getTerminator());
     128             : }
     129             : 
     130       34460 : void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
     131       34460 :   Value *Ptr = I.getPointerOperand();
     132       34460 :   if (!DA->isUniform(Ptr))
     133        4466 :     return;
     134             :   auto isGlobalLoad = [&](LoadInst &Load)->bool {
     135             :     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
     136             :   };
     137             :   // We're tracking up to the Function boundaries
     138             :   // We cannot go beyond because of FunctionPass restrictions
     139             :   // Thus we can ensure that memory not clobbered for memory
     140             :   // operations that live in kernel only.
     141       29994 :   bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
     142       29994 :   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
     143       30729 :   if (!PtrI && NotClobbered && isGlobalLoad(I)) {
     144         375 :     if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
     145             :       // Lookup for the existing GEP
     146          76 :       if (noClobberClones.count(Ptr)) {
     147           0 :         PtrI = noClobberClones[Ptr];
     148             :       } else {
     149             :         // Create GEP of the Value
     150          76 :         Function *F = I.getParent()->getParent();
     151          76 :         Value *Idx = Constant::getIntegerValue(
     152         152 :           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
     153             :         // Insert GEP at the entry to make it dominate all uses
     154         152 :         PtrI = GetElementPtrInst::Create(
     155             :           Ptr->getType()->getPointerElementType(), Ptr,
     156          76 :           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
     157             :       }
     158          76 :       I.replaceUsesOfWith(Ptr, PtrI);
     159             :     }
     160             :   }
     161             : 
     162       29994 :   if (PtrI) {
     163       28316 :     setUniformMetadata(PtrI);
     164       28316 :     if (NotClobbered)
     165       26477 :       setNoClobberMetadata(PtrI);
     166             :   }
     167             : }
     168             : 
     169        1953 : bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
     170        1953 :   return false;
     171             : }
     172             : 
     173       19771 : bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
     174       19771 :   if (skipFunction(F))
     175             :     return false;
     176             : 
     177       19767 :   DA  = &getAnalysis<LegacyDivergenceAnalysis>();
     178       19767 :   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
     179       19767 :   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     180       19767 :   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
     181             : 
     182             :   visit(F);
     183       19767 :   noClobberClones.clear();
     184       19767 :   return true;
     185             : }
     186             : 
     187             : FunctionPass *
     188        1964 : llvm::createAMDGPUAnnotateUniformValues() {
     189        1964 :   return new AMDGPUAnnotateUniformValues();
     190             : }

Generated by: LCOV version 1.13