LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUTargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 17 17 100.0 %
Date: 2018-07-13 00:08:38 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This file a TargetTransformInfo::Concept conforming object specific to the
      12             : /// AMDGPU target machine. It uses the target's detailed information to
      13             : /// provide more precise answers to certain TTI queries, while letting the
      14             : /// target independent and default TTI implementations handle the rest.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      19             : #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      20             : 
      21             : #include "AMDGPU.h"
      22             : #include "AMDGPUSubtarget.h"
      23             : #include "AMDGPUTargetMachine.h"
      24             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      25             : #include "Utils/AMDGPUBaseInfo.h"
      26             : #include "llvm/ADT/ArrayRef.h"
      27             : #include "llvm/Analysis/TargetTransformInfo.h"
      28             : #include "llvm/CodeGen/BasicTTIImpl.h"
      29             : #include "llvm/IR/Function.h"
      30             : #include "llvm/MC/SubtargetFeature.h"
      31             : #include "llvm/Support/MathExtras.h"
      32             : #include <cassert>
      33             : 
      34             : namespace llvm {
      35             : 
      36             : class AMDGPUTargetLowering;
      37             : class Loop;
      38             : class ScalarEvolution;
      39             : class Type;
      40             : class Value;
      41             : 
      42     1269252 : class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
      43             :   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
      44             :   using TTI = TargetTransformInfo;
      45             : 
      46             :   friend BaseT;
      47             : 
      48             :   Triple TargetTriple;
      49             : 
      50             : public:
      51             :   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
      52      423084 :     : BaseT(TM, F.getParent()->getDataLayout()),
      53     1269252 :       TargetTriple(TM->getTargetTriple()) {}
      54             : 
      55             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
      56             :                                TTI::UnrollingPreferences &UP);
      57             : };
      58             : 
      59     2684975 : class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
      60             :   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
      61             :   using TTI = TargetTransformInfo;
      62             : 
      63             :   friend BaseT;
      64             : 
      65             :   const AMDGPUSubtarget *ST;
      66             :   const AMDGPUTargetLowering *TLI;
      67             :   AMDGPUTTIImpl CommonTTI;
      68             :   bool IsGraphicsShader;
      69             : 
      70             :   const FeatureBitset InlineFeatureIgnoreList = {
      71             :     // Codegen control options which don't matter.
      72             :     AMDGPU::FeatureEnableLoadStoreOpt,
      73             :     AMDGPU::FeatureEnableSIScheduler,
      74             :     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
      75             :     AMDGPU::FeatureFlatForGlobal,
      76             :     AMDGPU::FeaturePromoteAlloca,
      77             :     AMDGPU::FeatureUnalignedBufferAccess,
      78             :     AMDGPU::FeatureUnalignedScratchAccess,
      79             : 
      80             :     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
      81             :     AMDGPU::FeatureDebuggerEmitPrologue,
      82             :     AMDGPU::FeatureDebuggerInsertNops,
      83             : 
      84             :     // Property of the kernel/environment which can't actually differ.
      85             :     AMDGPU::FeatureSGPRInitBug,
      86             :     AMDGPU::FeatureXNACK,
      87             :     AMDGPU::FeatureTrapHandler,
      88             : 
      89             :     // Perf-tuning features
      90             :     AMDGPU::FeatureFastFMAF32,
      91             :     AMDGPU::HalfRate64Ops
      92             :   };
      93             : 
      94             :   const AMDGPUSubtarget *getST() const { return ST; }
      95             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
      96             : 
      97             :   static inline int getFullRateInstrCost() {
      98             :     return TargetTransformInfo::TCC_Basic;
      99             :   }
     100             : 
     101             :   static inline int getHalfRateInstrCost() {
     102             :     return 2 * TargetTransformInfo::TCC_Basic;
     103             :   }
     104             : 
     105             :   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
     106             :   // should be 2 or 4.
     107             :   static inline int getQuarterRateInstrCost() {
     108             :     return 3 * TargetTransformInfo::TCC_Basic;
     109             :   }
     110             : 
     111             :    // On some parts, normal fp64 operations are half rate, and others
     112             :    // quarter. This also applies to some integer operations.
     113             :   inline int get64BitInstrCost() const {
     114          48 :     return ST->hasHalfRate64Ops() ?
     115             :       getHalfRateInstrCost() : getQuarterRateInstrCost();
     116             :   }
     117             : 
     118             : public:
     119      383569 :   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     120      383569 :     : BaseT(TM, F.getParent()->getDataLayout()),
     121      383569 :       ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
     122      383569 :       TLI(ST->getTargetLowering()),
     123             :       CommonTTI(TM, F),
     124     1534276 :       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
     125             : 
     126             :   bool hasBranchDivergence() { return true; }
     127             : 
     128             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     129             :                                TTI::UnrollingPreferences &UP);
     130             : 
     131             :   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     132             :     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
     133             :     return TTI::PSK_FastHardware;
     134             :   }
     135             : 
     136             :   unsigned getHardwareNumberOfRegisters(bool Vector) const;
     137             :   unsigned getNumberOfRegisters(bool Vector) const;
     138             :   unsigned getRegisterBitWidth(bool Vector) const;
     139             :   unsigned getMinVectorRegisterBitWidth() const;
     140             :   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
     141             :                                unsigned ChainSizeInBytes,
     142             :                                VectorType *VecTy) const;
     143             :   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
     144             :                                 unsigned ChainSizeInBytes,
     145             :                                 VectorType *VecTy) const;
     146             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     147             : 
     148             :   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
     149             :                                   unsigned Alignment,
     150             :                                   unsigned AddrSpace) const;
     151             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     152             :                                    unsigned Alignment,
     153             :                                    unsigned AddrSpace) const;
     154             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     155             :                                     unsigned Alignment,
     156             :                                     unsigned AddrSpace) const;
     157             : 
     158             :   unsigned getMaxInterleaveFactor(unsigned VF);
     159             : 
     160             :   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
     161             : 
     162             :   int getArithmeticInstrCost(
     163             :     unsigned Opcode, Type *Ty,
     164             :     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
     165             :     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
     166             :     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
     167             :     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
     168             :     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
     169             : 
     170             :   unsigned getCFInstrCost(unsigned Opcode);
     171             : 
     172             :   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
     173             :   bool isSourceOfDivergence(const Value *V) const;
     174             :   bool isAlwaysUniform(const Value *V) const;
     175             : 
     176             :   unsigned getFlatAddressSpace() const {
     177             :     // Don't bother running InferAddressSpaces pass on graphics shaders which
     178             :     // don't use flat addressing.
     179       18240 :     if (IsGraphicsShader)
     180             :       return -1;
     181       16984 :     return ST->hasFlatAddressSpace() ?
     182             :       ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
     183             :   }
     184             : 
     185             :   unsigned getVectorSplitCost() { return 0; }
     186             : 
     187             :   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     188             :                           Type *SubTp);
     189             : 
     190             :   bool areInlineCompatible(const Function *Caller,
     191             :                            const Function *Callee) const;
     192             : 
     193             :   unsigned getInliningThresholdMultiplier() { return 9; }
     194             : 
     195             :   int getArithmeticReductionCost(unsigned Opcode,
     196             :                                  Type *Ty,
     197             :                                  bool IsPairwise);
     198             :   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
     199             :                              bool IsPairwiseForm,
     200             :                              bool IsUnsigned);
     201             : };
     202             : 
     203      197574 : class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
     204             :   using BaseT = BasicTTIImplBase<R600TTIImpl>;
     205             :   using TTI = TargetTransformInfo;
     206             : 
     207             :   friend BaseT;
     208             : 
     209             :   const R600Subtarget *ST;
     210             :   const AMDGPUTargetLowering *TLI;
     211             :   AMDGPUTTIImpl CommonTTI;
     212             : 
     213             : public:
     214       39515 :   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     215       39515 :     : BaseT(TM, F.getParent()->getDataLayout()),
     216       39515 :       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
     217             :       TLI(ST->getTargetLowering()),
     218      158060 :       CommonTTI(TM, F)  {}
     219             : 
     220             :   const R600Subtarget *getST() const { return ST; }
     221             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
     222             : 
     223             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     224             :                                TTI::UnrollingPreferences &UP);
     225             :   unsigned getHardwareNumberOfRegisters(bool Vec) const;
     226             :   unsigned getNumberOfRegisters(bool Vec) const;
     227             :   unsigned getRegisterBitWidth(bool Vector) const;
     228             :   unsigned getMinVectorRegisterBitWidth() const;
     229             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     230             :   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
     231             :                                   unsigned AddrSpace) const;
     232             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     233             :                                    unsigned Alignment,
     234             :                                    unsigned AddrSpace) const;
     235             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     236             :                                     unsigned Alignment,
     237             :                                     unsigned AddrSpace) const;
     238             :   unsigned getMaxInterleaveFactor(unsigned VF);
     239             :   unsigned getCFInstrCost(unsigned Opcode);
     240             :   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
     241             : };
     242             : 
     243             : } // end namespace llvm
     244             : 
     245             : #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

Generated by: LCOV version 1.13