LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUTargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 20 20 100.0 %
Date: 2018-06-17 00:07:59 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This file a TargetTransformInfo::Concept conforming object specific to the
      12             : /// AMDGPU target machine. It uses the target's detailed information to
      13             : /// provide more precise answers to certain TTI queries, while letting the
      14             : /// target independent and default TTI implementations handle the rest.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      19             : #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      20             : 
      21             : #include "AMDGPU.h"
      22             : #include "AMDGPUSubtarget.h"
      23             : #include "AMDGPUTargetMachine.h"
      24             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      25             : #include "Utils/AMDGPUBaseInfo.h"
      26             : #include "llvm/ADT/ArrayRef.h"
      27             : #include "llvm/Analysis/TargetTransformInfo.h"
      28             : #include "llvm/CodeGen/BasicTTIImpl.h"
      29             : #include "llvm/IR/Function.h"
      30             : #include "llvm/MC/SubtargetFeature.h"
      31             : #include "llvm/Support/MathExtras.h"
      32             : #include <cassert>
      33             : 
      34             : namespace llvm {
      35             : 
      36             : class AMDGPUTargetLowering;
      37             : class Loop;
      38             : class ScalarEvolution;
      39             : class Type;
      40             : class Value;
      41             : 
      42     1265589 : class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
      43             :   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
      44             :   using TTI = TargetTransformInfo;
      45             : 
      46             :   friend BaseT;
      47             : 
      48             :   const AMDGPUSubtarget *ST;
      49             :   const AMDGPUTargetLowering *TLI;
      50             : 
      51             : public:
      52      421863 :   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
      53      421863 :     : BaseT(TM, F.getParent()->getDataLayout()),
      54      421863 :       ST(TM->getSubtargetImpl(F)),
      55      843726 :       TLI(ST->getTargetLowering()) {}
      56             : 
      57             :   const AMDGPUSubtarget *getST() const { return ST; }
      58             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
      59             : 
      60             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
      61             :                                TTI::UnrollingPreferences &UP);
      62             : };
      63             : 
      64     1912415 : class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
      65             :   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
      66             :   using TTI = TargetTransformInfo;
      67             : 
      68             :   friend BaseT;
      69             : 
      70             :   const AMDGPUSubtarget *ST;
      71             :   const AMDGPUTargetLowering *TLI;
      72             :   AMDGPUTTIImpl CommonTTI;
      73             :   bool IsGraphicsShader;
      74             : 
      75             :   const FeatureBitset InlineFeatureIgnoreList = {
      76             :     // Codegen control options which don't matter.
      77             :     AMDGPU::FeatureEnableLoadStoreOpt,
      78             :     AMDGPU::FeatureEnableSIScheduler,
      79             :     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
      80             :     AMDGPU::FeatureFlatForGlobal,
      81             :     AMDGPU::FeaturePromoteAlloca,
      82             :     AMDGPU::FeatureUnalignedBufferAccess,
      83             :     AMDGPU::FeatureUnalignedScratchAccess,
      84             : 
      85             :     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
      86             :     AMDGPU::FeatureDebuggerEmitPrologue,
      87             :     AMDGPU::FeatureDebuggerInsertNops,
      88             :     AMDGPU::FeatureDebuggerReserveRegs,
      89             : 
      90             :     // Property of the kernel/environment which can't actually differ.
      91             :     AMDGPU::FeatureSGPRInitBug,
      92             :     AMDGPU::FeatureXNACK,
      93             :     AMDGPU::FeatureTrapHandler,
      94             : 
      95             :     // Perf-tuning features
      96             :     AMDGPU::FeatureFastFMAF32,
      97             :     AMDGPU::HalfRate64Ops
      98             :   };
      99             : 
     100             :   const AMDGPUSubtarget *getST() const { return ST; }
     101             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
     102             : 
     103             :   static inline int getFullRateInstrCost() {
     104             :     return TargetTransformInfo::TCC_Basic;
     105             :   }
     106             : 
     107             :   static inline int getHalfRateInstrCost() {
     108             :     return 2 * TargetTransformInfo::TCC_Basic;
     109             :   }
     110             : 
     111             :   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
     112             :   // should be 2 or 4.
     113             :   static inline int getQuarterRateInstrCost() {
     114             :     return 3 * TargetTransformInfo::TCC_Basic;
     115             :   }
     116             : 
     117             :    // On some parts, normal fp64 operations are half rate, and others
     118             :    // quarter. This also applies to some integer operations.
     119             :   inline int get64BitInstrCost() const {
     120          48 :     return ST->hasHalfRate64Ops() ?
     121             :       getHalfRateInstrCost() : getQuarterRateInstrCost();
     122             :   }
     123             : 
     124             : public:
     125      382483 :   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     126      382483 :     : BaseT(TM, F.getParent()->getDataLayout()),
     127      382483 :       ST(TM->getSubtargetImpl(F)),
     128      382483 :       TLI(ST->getTargetLowering()),
     129             :       CommonTTI(TM, F),
     130     1529932 :       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
     131             : 
     132             :   bool hasBranchDivergence() { return true; }
     133             : 
     134             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     135             :                                TTI::UnrollingPreferences &UP);
     136             : 
     137             :   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     138             :     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
     139             :     return TTI::PSK_FastHardware;
     140             :   }
     141             : 
     142             :   unsigned getHardwareNumberOfRegisters(bool Vector) const;
     143             :   unsigned getNumberOfRegisters(bool Vector) const;
     144             :   unsigned getRegisterBitWidth(bool Vector) const;
     145             :   unsigned getMinVectorRegisterBitWidth() const;
     146             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     147             :                                unsigned ChainSizeInBytes,
     148             :                                VectorType *VecTy) const;
     149             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     150             :                                 unsigned ChainSizeInBytes,
     151             :                                 VectorType *VecTy) const;
     152             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     153             : 
     154             :   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
     155             :                                   unsigned Alignment,
     156             :                                   unsigned AddrSpace) const;
     157             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     158             :                                    unsigned Alignment,
     159             :                                    unsigned AddrSpace) const;
     160             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     161             :                                     unsigned Alignment,
     162             :                                     unsigned AddrSpace) const;
     163             : 
     164             :   unsigned getMaxInterleaveFactor(unsigned VF);
     165             : 
     166             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     167             : 
     168             :   int getArithmeticInstrCost(
     169             :     unsigned Opcode, Type *Ty,
     170             :     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
     171             :     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
     172             :     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
     173             :     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
     174             :     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
     175             : 
     176             :   unsigned getCFInstrCost(unsigned Opcode);
     177             : 
     178             :   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
     179             :   bool isSourceOfDivergence(const Value *V) const;
     180             :   bool isAlwaysUniform(const Value *V) const;
     181             : 
     182             :   unsigned getFlatAddressSpace() const {
     183             :     // Don't bother running InferAddressSpaces pass on graphics shaders which
     184             :     // don't use flat addressing.
     185       18206 :     if (IsGraphicsShader)
     186             :       return -1;
     187       17127 :     return ST->hasFlatAddressSpace() ?
     188             :       ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
     189             :   }
     190             : 
     191             :   unsigned getVectorSplitCost() { return 0; }
     192             : 
     193             :   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     194             :                           Type *SubTp);
     195             : 
     196             :   bool areInlineCompatible(const Function *Caller,
     197             :                            const Function *Callee) const;
     198             : 
     199             :   unsigned getInliningThresholdMultiplier() { return 9; }
     200             : 
     201             :   int getArithmeticReductionCost(unsigned Opcode,
     202             :                                  Type *Ty,
     203             :                                  bool IsPairwise);
     204             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
     205             :                              bool IsPairwiseForm,
     206             :                              bool IsUnsigned);
     207             : };
     208             : 
     209      118140 : class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
     210             :   using BaseT = BasicTTIImplBase<R600TTIImpl>;
     211             :   using TTI = TargetTransformInfo;
     212             : 
     213             :   friend BaseT;
     214             : 
     215             :   const AMDGPUSubtarget *ST;
     216             :   const AMDGPUTargetLowering *TLI;
     217             :   AMDGPUTTIImpl CommonTTI;
     218             : 
     219             : public:
     220       39380 :   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     221       39380 :     : BaseT(TM, F.getParent()->getDataLayout()),
     222       39380 :       ST(TM->getSubtargetImpl(F)),
     223       39380 :       TLI(ST->getTargetLowering()),
     224      118140 :       CommonTTI(TM, F)  {}
     225             : 
     226             :   const AMDGPUSubtarget *getST() const { return ST; }
     227             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
     228             : 
     229             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     230             :                                TTI::UnrollingPreferences &UP);
     231             :   unsigned getHardwareNumberOfRegisters(bool Vec) const;
     232             :   unsigned getNumberOfRegisters(bool Vec) const;
     233             :   unsigned getRegisterBitWidth(bool Vector) const;
     234             :   unsigned getMinVectorRegisterBitWidth() const;
     235             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     236             :   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
     237             :                                   unsigned AddrSpace) const;
     238             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     239             :                                    unsigned Alignment,
     240             :                                    unsigned AddrSpace) const;
     241             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     242             :                                     unsigned Alignment,
     243             :                                     unsigned AddrSpace) const;
     244             :   unsigned getMaxInterleaveFactor(unsigned VF);
     245             :   unsigned getCFInstrCost(unsigned Opcode);
     246             :   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
     247             : };
     248             : 
     249             : } // end namespace llvm
     250             : 
     251             : #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

Generated by: LCOV version 1.13