LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUTargetTransformInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 10 10 100.0 %
Date: 2017-09-14 15:23:50 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// This file a TargetTransformInfo::Concept conforming object specific to the
      12             : /// AMDGPU target machine. It uses the target's detailed information to
      13             : /// provide more precise answers to certain TTI queries, while letting the
      14             : /// target independent and default TTI implementations handle the rest.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      19             : #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
      20             : 
      21             : #include "AMDGPU.h"
      22             : #include "AMDGPUSubtarget.h"
      23             : #include "AMDGPUTargetMachine.h"
      24             : #include "Utils/AMDGPUBaseInfo.h"
      25             : #include "llvm/ADT/ArrayRef.h"
      26             : #include "llvm/Analysis/TargetTransformInfo.h"
      27             : #include "llvm/CodeGen/BasicTTIImpl.h"
      28             : #include "llvm/IR/Function.h"
      29             : #include "llvm/MC/SubtargetFeature.h"
      30             : #include "llvm/Support/MathExtras.h"
      31             : #include <cassert>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : class AMDGPUTargetLowering;
      36             : class Loop;
      37             : class ScalarEvolution;
      38             : class Type;
      39             : class Value;
      40             : 
      41     1143768 : class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
      42             :   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
      43             :   using TTI = TargetTransformInfo;
      44             : 
      45             :   friend BaseT;
      46             : 
      47             :   const AMDGPUSubtarget *ST;
      48             :   const AMDGPUTargetLowering *TLI;
      49             :   bool IsGraphicsShader;
      50             : 
      51             :   const FeatureBitset InlineFeatureIgnoreList = {
      52             :     // Codegen control options which don't matter.
      53             :     AMDGPU::FeatureEnableLoadStoreOpt,
      54             :     AMDGPU::FeatureEnableSIScheduler,
      55             :     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
      56             :     AMDGPU::FeatureFlatForGlobal,
      57             :     AMDGPU::FeaturePromoteAlloca,
      58             :     AMDGPU::FeatureUnalignedBufferAccess,
      59             :     AMDGPU::FeatureUnalignedScratchAccess,
      60             : 
      61             :     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
      62             :     AMDGPU::FeatureDebuggerEmitPrologue,
      63             :     AMDGPU::FeatureDebuggerInsertNops,
      64             :     AMDGPU::FeatureDebuggerReserveRegs,
      65             : 
      66             :     // Property of the kernel/environment which can't actually differ.
      67             :     AMDGPU::FeatureSGPRInitBug,
      68             :     AMDGPU::FeatureXNACK,
      69             :     AMDGPU::FeatureTrapHandler,
      70             : 
      71             :     // Perf-tuning features
      72             :     AMDGPU::FeatureFastFMAF32,
      73             :     AMDGPU::HalfRate64Ops
      74             :   };
      75             : 
      76             :   const AMDGPUSubtarget *getST() const { return ST; }
      77             :   const AMDGPUTargetLowering *getTLI() const { return TLI; }
      78             : 
      79             :   static inline int getFullRateInstrCost() {
      80             :     return TargetTransformInfo::TCC_Basic;
      81             :   }
      82             : 
      83             :   static inline int getHalfRateInstrCost() {
      84             :     return 2 * TargetTransformInfo::TCC_Basic;
      85             :   }
      86             : 
      87             :   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
      88             :   // should be 2 or 4.
      89             :   static inline int getQuarterRateInstrCost() {
      90             :     return 3 * TargetTransformInfo::TCC_Basic;
      91             :   }
      92             : 
      93             :    // On some parts, normal fp64 operations are half rate, and others
      94             :    // quarter. This also applies to some integer operations.
      95             :   inline int get64BitInstrCost() const {
      96          48 :     return ST->hasHalfRate64Ops() ?
      97             :       getHalfRateInstrCost() : getQuarterRateInstrCost();
      98             :   }
      99             : 
     100             : public:
     101      285942 :   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
     102      285942 :     : BaseT(TM, F.getParent()->getDataLayout()),
     103      285942 :       ST(TM->getSubtargetImpl(F)),
     104      285942 :       TLI(ST->getTargetLowering()),
     105     1429710 :       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
     106             : 
     107             :   bool hasBranchDivergence() { return true; }
     108             : 
     109             :   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     110             :                                TTI::UnrollingPreferences &UP);
     111             : 
     112             :   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     113             :     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
     114             :     return TTI::PSK_FastHardware;
     115             :   }
     116             : 
     117             :   unsigned getHardwareNumberOfRegisters(bool Vector) const;
     118             :   unsigned getNumberOfRegisters(bool Vector) const;
     119             :   unsigned getRegisterBitWidth(bool Vector) const;
     120             :   unsigned getMinVectorRegisterBitWidth() const;
     121             :   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
     122             : 
     123             :   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
     124             :                                   unsigned Alignment,
     125             :                                   unsigned AddrSpace) const;
     126             :   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
     127             :                                    unsigned Alignment,
     128             :                                    unsigned AddrSpace) const;
     129             :   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
     130             :                                     unsigned Alignment,
     131             :                                     unsigned AddrSpace) const;
     132             : 
     133             :   unsigned getMaxInterleaveFactor(unsigned VF);
     134             : 
     135             :   int getArithmeticInstrCost(
     136             :     unsigned Opcode, Type *Ty,
     137             :     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
     138             :     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
     139             :     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
     140             :     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
     141             :     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
     142             : 
     143             :   unsigned getCFInstrCost(unsigned Opcode);
     144             : 
     145             :   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
     146             :   bool isSourceOfDivergence(const Value *V) const;
     147             :   bool isAlwaysUniform(const Value *V) const;
     148             : 
     149             :   unsigned getFlatAddressSpace() const {
     150             :     // Don't bother running InferAddressSpaces pass on graphics shaders which
     151             :     // don't use flat addressing.
     152       17291 :     if (IsGraphicsShader)
     153             :       return -1;
     154       16694 :     return ST->hasFlatAddressSpace() ?
     155       18358 :       ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
     156             :   }
     157             : 
     158             :   unsigned getVectorSplitCost() { return 0; }
     159             : 
     160             :   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     161             :                           Type *SubTp);
     162             : 
     163             :   bool areInlineCompatible(const Function *Caller,
     164             :                            const Function *Callee) const;
     165             : };
     166             : 
     167             : } // end namespace llvm
     168             : 
     169             : #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H

Generated by: LCOV version 1.13