LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUAnnotateKernelFeatures.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 68 106 64.2 %
Date: 2018-10-20 13:21:21 Functions: 11 13 84.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file This pass adds target attributes to functions which use intrinsics
      11             : /// which will impact calling convention lowering.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPU.h"
      16             : #include "AMDGPUSubtarget.h"
      17             : #include "Utils/AMDGPUBaseInfo.h"
      18             : #include "llvm/ADT/SmallPtrSet.h"
      19             : #include "llvm/ADT/SmallVector.h"
      20             : #include "llvm/ADT/StringRef.h"
      21             : #include "llvm/ADT/Triple.h"
      22             : #include "llvm/Analysis/CallGraph.h"
      23             : #include "llvm/Analysis/CallGraphSCCPass.h"
      24             : #include "llvm/CodeGen/TargetPassConfig.h"
      25             : #include "llvm/IR/CallSite.h"
      26             : #include "llvm/IR/Constant.h"
      27             : #include "llvm/IR/Constants.h"
      28             : #include "llvm/IR/Function.h"
      29             : #include "llvm/IR/Instruction.h"
      30             : #include "llvm/IR/Instructions.h"
      31             : #include "llvm/IR/Intrinsics.h"
      32             : #include "llvm/IR/Module.h"
      33             : #include "llvm/IR/Type.h"
      34             : #include "llvm/IR/Use.h"
      35             : #include "llvm/Pass.h"
      36             : #include "llvm/Support/Casting.h"
      37             : #include "llvm/Support/ErrorHandling.h"
      38             : #include "llvm/Target/TargetMachine.h"
      39             : 
      40             : #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
      41             : 
      42             : using namespace llvm;
      43             : 
      44             : namespace {
      45             : 
      46             : class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
      47             : private:
      48             :   const TargetMachine *TM = nullptr;
      49             : 
      50             :   bool addFeatureAttributes(Function &F);
      51             : 
      52             : public:
      53             :   static char ID;
      54             : 
      55        3936 :   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
      56             : 
      57             :   bool doInitialization(CallGraph &CG) override;
      58             :   bool runOnSCC(CallGraphSCC &SCC) override;
      59             : 
      60           0 :   StringRef getPassName() const override {
      61           0 :     return "AMDGPU Annotate Kernel Features";
      62             :   }
      63             : 
      64        1952 :   void getAnalysisUsage(AnalysisUsage &AU) const override {
      65             :     AU.setPreservesAll();
      66        1952 :     CallGraphSCCPass::getAnalysisUsage(AU);
      67        1952 :   }
      68             : 
      69             :   static bool visitConstantExpr(const ConstantExpr *CE);
      70             :   static bool visitConstantExprsRecursively(
      71             :     const Constant *EntryC,
      72             :     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
      73             : };
      74             : 
      75             : } // end anonymous namespace
      76             : 
      77             : char AMDGPUAnnotateKernelFeatures::ID = 0;
      78             : 
      79             : char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
      80             : 
      81      199024 : INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
      82             :                 "Add AMDGPU function attributes", false, false)
      83             : 
      84             : 
      85             : // The queue ptr is only needed when casting to flat, not from it.
      86             : static bool castRequiresQueuePtr(unsigned SrcAS) {
      87          25 :   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
      88             : }
      89             : 
      90             : static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
      91             :   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
      92             : }
      93             : 
      94         221 : bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
      95         221 :   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
      96          25 :     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
      97          25 :     return castRequiresQueuePtr(SrcAS);
      98             :   }
      99             : 
     100             :   return false;
     101             : }
     102             : 
     103      104473 : bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
     104             :   const Constant *EntryC,
     105             :   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
     106             : 
     107      104473 :   if (!ConstantExprVisited.insert(EntryC).second)
     108             :     return false;
     109             : 
     110             :   SmallVector<const Constant *, 16> Stack;
     111       83633 :   Stack.push_back(EntryC);
     112             : 
     113      168533 :   while (!Stack.empty()) {
     114             :     const Constant *C = Stack.pop_back_val();
     115             : 
     116             :     // Check this constant expression.
     117             :     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
     118         221 :       if (visitConstantExpr(CE))
     119             :         return true;
     120             :     }
     121             : 
     122             :     // Visit all sub-expressions.
     123      173338 :     for (const Use &U : C->operands()) {
     124        3538 :       const auto *OpC = dyn_cast<Constant>(U);
     125        3538 :       if (!OpC)
     126        2256 :         continue;
     127             : 
     128        3538 :       if (!ConstantExprVisited.insert(OpC).second)
     129             :         continue;
     130             : 
     131        1282 :       Stack.push_back(OpC);
     132             :     }
     133             :   }
     134             : 
     135             :   return false;
     136             : }
     137             : 
     138             : // We do not need to note the x workitem or workgroup id because they are always
     139             : // initialized.
     140             : //
     141             : // TODO: We should not add the attributes if the known compile time workgroup
     142             : // size is 1 for y/z.
     143       28833 : static StringRef intrinsicToAttrName(Intrinsic::ID ID,
     144             :                                      bool &NonKernelOnly,
     145             :                                      bool &IsQueuePtr) {
     146       28833 :   switch (ID) {
     147        2608 :   case Intrinsic::amdgcn_workitem_id_x:
     148        2608 :     NonKernelOnly = true;
     149        2608 :     return "amdgpu-work-item-id-x";
     150          53 :   case Intrinsic::amdgcn_workgroup_id_x:
     151          53 :     NonKernelOnly = true;
     152          53 :     return "amdgpu-work-group-id-x";
     153             :   case Intrinsic::amdgcn_workitem_id_y:
     154             :   case Intrinsic::r600_read_tidig_y:
     155         141 :     return "amdgpu-work-item-id-y";
     156             :   case Intrinsic::amdgcn_workitem_id_z:
     157             :   case Intrinsic::r600_read_tidig_z:
     158          85 :     return "amdgpu-work-item-id-z";
     159             :   case Intrinsic::amdgcn_workgroup_id_y:
     160             :   case Intrinsic::r600_read_tgid_y:
     161          42 :     return "amdgpu-work-group-id-y";
     162             :   case Intrinsic::amdgcn_workgroup_id_z:
     163             :   case Intrinsic::r600_read_tgid_z:
     164          36 :     return "amdgpu-work-group-id-z";
     165             :   case Intrinsic::amdgcn_dispatch_ptr:
     166          55 :     return "amdgpu-dispatch-ptr";
     167             :   case Intrinsic::amdgcn_dispatch_id:
     168          10 :     return "amdgpu-dispatch-id";
     169             :   case Intrinsic::amdgcn_kernarg_segment_ptr:
     170       14782 :     return "amdgpu-kernarg-segment-ptr";
     171             :   case Intrinsic::amdgcn_implicitarg_ptr:
     172          42 :     return "amdgpu-implicitarg-ptr";
     173          48 :   case Intrinsic::amdgcn_queue_ptr:
     174             :   case Intrinsic::trap:
     175             :   case Intrinsic::debugtrap:
     176          48 :     IsQueuePtr = true;
     177          48 :     return "amdgpu-queue-ptr";
     178             :   default:
     179       10931 :     return "";
     180             :   }
     181             : }
     182             : 
     183       12551 : static bool handleAttr(Function &Parent, const Function &Callee,
     184             :                        StringRef Name) {
     185       12551 :   if (Callee.hasFnAttribute(Name)) {
     186         141 :     Parent.addFnAttr(Name);
     187         141 :     return true;
     188             :   }
     189             : 
     190             :   return false;
     191             : }
     192             : 
     193        1141 : static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
     194             :                                    bool &NeedQueuePtr) {
     195             :   // X ids unnecessarily propagated to kernels.
     196             :   static const StringRef AttrNames[] = {
     197             :     { "amdgpu-work-item-id-x" },
     198             :     { "amdgpu-work-item-id-y" },
     199             :     { "amdgpu-work-item-id-z" },
     200             :     { "amdgpu-work-group-id-x" },
     201             :     { "amdgpu-work-group-id-y" },
     202             :     { "amdgpu-work-group-id-z" },
     203             :     { "amdgpu-dispatch-ptr" },
     204             :     { "amdgpu-dispatch-id" },
     205             :     { "amdgpu-kernarg-segment-ptr" },
     206             :     { "amdgpu-implicitarg-ptr" }
     207        1141 :   };
     208             : 
     209        1141 :   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
     210          11 :     NeedQueuePtr = true;
     211             : 
     212       12551 :   for (StringRef AttrName : AttrNames)
     213       11410 :     handleAttr(Parent, Callee, AttrName);
     214        1141 : }
     215             : 
     216           0 : bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
     217           0 :   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
     218           0 :   bool HasFlat = ST.hasFlatAddressSpace();
     219           0 :   bool HasApertureRegs = ST.hasApertureRegs();
     220             :   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
     221             : 
     222             :   bool Changed = false;
     223           0 :   bool NeedQueuePtr = false;
     224             :   bool HaveCall = false;
     225           0 :   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
     226             : 
     227           0 :   for (BasicBlock &BB : F) {
     228           0 :     for (Instruction &I : BB) {
     229             :       CallSite CS(&I);
     230           0 :       if (CS) {
     231             :         Function *Callee = CS.getCalledFunction();
     232             : 
     233             :         // TODO: Do something with indirect calls.
     234           0 :         if (!Callee) {
     235           0 :           if (!CS.isInlineAsm())
     236             :             HaveCall = true;
     237           0 :           continue;
     238             :         }
     239             : 
     240           0 :         Intrinsic::ID IID = Callee->getIntrinsicID();
     241           0 :         if (IID == Intrinsic::not_intrinsic) {
     242             :           HaveCall = true;
     243           0 :           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
     244             :           Changed = true;
     245             :         } else {
     246           0 :           bool NonKernelOnly = false;
     247             :           StringRef AttrName = intrinsicToAttrName(IID,
     248           0 :                                                    NonKernelOnly, NeedQueuePtr);
     249           0 :           if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
     250           0 :             F.addFnAttr(AttrName);
     251             :             Changed = true;
     252             :           }
     253             :         }
     254             :       }
     255             : 
     256           0 :       if (NeedQueuePtr || HasApertureRegs)
     257           0 :         continue;
     258             : 
     259             :       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
     260           0 :         if (castRequiresQueuePtr(ASC)) {
     261           0 :           NeedQueuePtr = true;
     262           0 :           continue;
     263             :         }
     264             :       }
     265             : 
     266           0 :       for (const Use &U : I.operands()) {
     267             :         const auto *OpC = dyn_cast<Constant>(U);
     268           0 :         if (!OpC)
     269           0 :           continue;
     270             : 
     271           0 :         if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
     272           0 :           NeedQueuePtr = true;
     273           0 :           break;
     274             :         }
     275             :       }
     276             :     }
     277             :   }
     278             : 
     279           0 :   if (NeedQueuePtr) {
     280           0 :     F.addFnAttr("amdgpu-queue-ptr");
     281             :     Changed = true;
     282             :   }
     283             : 
     284             :   // TODO: We could refine this to captured pointers that could possibly be
     285             :   // accessed by flat instructions. For now this is mostly a poor way of
     286             :   // estimating whether there are calls before argument lowering.
     287           0 :   if (HasFlat && !IsFunc && HaveCall) {
     288           0 :     F.addFnAttr("amdgpu-flat-scratch");
     289             :     Changed = true;
     290             :   }
     291             : 
     292           0 :   return Changed;
     293             : }
     294             : 
     295       27165 : bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
     296       27165 :   Module &M = SCC.getCallGraph().getModule();
     297       27165 :   Triple TT(M.getTargetTriple());
     298             : 
     299             :   bool Changed = false;
     300       54330 :   for (CallGraphNode *I : SCC) {
     301       27165 :     Function *F = I->getFunction();
     302       27165 :     if (!F || F->isDeclaration())
     303        7691 :       continue;
     304             : 
     305       19474 :     Changed |= addFeatureAttributes(*F);
     306             :   }
     307             : 
     308       27165 :   return Changed;
     309             : }
     310             : 
     311        1951 : bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
     312        1951 :   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     313        1951 :   if (!TPC)
     314           0 :     report_fatal_error("TargetMachine is required");
     315             : 
     316        1951 :   TM = &TPC->getTM<TargetMachine>();
     317        1951 :   return false;
     318             : }
     319             : 
     320        1964 : Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
     321        1964 :   return new AMDGPUAnnotateKernelFeatures();
     322             : }

Generated by: LCOV version 1.13