LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUInstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 24 26 92.3 %
Date: 2018-06-17 00:07:59 Functions: 4 5 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// Implementation of the TargetInstrInfo class that is common to all
      12             : /// AMD GPUs.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPUInstrInfo.h"
      17             : #include "AMDGPURegisterInfo.h"
      18             : #include "AMDGPUTargetMachine.h"
      19             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      20             : #include "llvm/CodeGen/MachineFrameInfo.h"
      21             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      22             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      23             : 
      24             : using namespace llvm;
      25             : 
      26             : #define GET_INSTRINFO_CTOR_DTOR
      27             : #include "AMDGPUGenInstrInfo.inc"
      28             : 
      29             : namespace llvm {
      30             : namespace AMDGPU {
      31             : #define GET_RSRCINTRINSIC_IMPL
      32             : #include "AMDGPUGenSearchableTables.inc"
      33             : 
      34             : #define GET_D16IMAGEDIMINTRINSIC_IMPL
      35             : #include "AMDGPUGenSearchableTables.inc"
      36             : }
      37             : }
      38             : 
      39             : // Pin the vtable to this file.
      40           0 : void AMDGPUInstrInfo::anchor() {}
      41             : 
      42        2527 : AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
      43             :   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
      44             :     ST(ST),
      45        5054 :     AMDGPUASI(ST.getAMDGPUAS()) {}
      46             : 
      47             : // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
      48             : // the first 16 loads will be interleaved with the stores, and the next 16 will
      49             : // be clustered as expected. It should really split into 2 16 store batches.
      50             : //
      51             : // Loads are clustered until this returns false, rather than trying to schedule
      52             : // groups of stores. This also means we have to deal with saying different
      53             : // address space loads should be clustered, and ones which might cause bank
      54             : // conflicts.
      55             : //
      56             : // This might be deprecated so it might not be worth that much effort to fix.
      57       54814 : bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
      58             :                                               int64_t Offset0, int64_t Offset1,
      59             :                                               unsigned NumLoads) const {
      60             :   assert(Offset1 > Offset0 &&
      61             :          "Second offset should be larger than first offset!");
      62             :   // If we have less than 16 loads in a row, and the offsets are within 64
      63             :   // bytes, then schedule together.
      64             : 
      65             :   // A cacheline is 64 bytes (for global memory).
      66       54814 :   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
      67             : }
      68             : 
      69             : // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
      70             : enum SIEncodingFamily {
      71             :   SI = 0,
      72             :   VI = 1,
      73             :   SDWA = 2,
      74             :   SDWA9 = 3,
      75             :   GFX80 = 4,
      76             :   GFX9 = 5
      77             : };
      78             : 
      79             : static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
      80     1193807 :   switch (ST.getGeneration()) {
      81             :   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
      82             :   case AMDGPUSubtarget::SEA_ISLANDS:
      83             :     return SIEncodingFamily::SI;
      84      642051 :   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
      85             :   case AMDGPUSubtarget::GFX9:
      86             :     return SIEncodingFamily::VI;
      87             : 
      88             :   // FIXME: This should never be called for r600 GPUs.
      89             :   case AMDGPUSubtarget::R600:
      90             :   case AMDGPUSubtarget::R700:
      91             :   case AMDGPUSubtarget::EVERGREEN:
      92             :   case AMDGPUSubtarget::NORTHERN_ISLANDS:
      93             :     return SIEncodingFamily::SI;
      94             :   }
      95             : 
      96           0 :   llvm_unreachable("Unknown subtarget generation!");
      97             : }
      98             : 
      99     1193807 : int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
     100     1193807 :   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
     101             : 
     102     2469708 :   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
     103       82094 :     ST.getGeneration() >= AMDGPUSubtarget::GFX9)
     104             :     Gen = SIEncodingFamily::GFX9;
     105             : 
     106     1193807 :   if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
     107        4886 :     Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
     108             :                                                       : SIEncodingFamily::SDWA;
     109             :   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
     110             :   // subtarget has UnpackedD16VMem feature.
     111             :   // TODO: remove this when we discard GFX80 encoding.
     112     1661102 :   if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
     113     1193942 :                               && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
     114             :     Gen = SIEncodingFamily::GFX80;
     115             : 
     116     1193807 :   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
     117             : 
     118             :   // -1 means that Opcode is already a native instruction.
     119     1193807 :   if (MCOp == -1)
     120             :     return Opcode;
     121             : 
     122             :   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
     123             :   // no encoding in the given subtarget generation.
     124      997092 :   if (MCOp == (uint16_t)-1)
     125             :     return -1;
     126             : 
     127      975986 :   return MCOp;
     128             : }
     129             : 
     130             : // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
     131          39 : bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
     132             :   const Value *Ptr = MMO->getValue();
     133             :   // UndefValue means this is a load of a kernel input.  These are uniform.
     134             :   // Sometimes LDS instructions have constant pointers.
     135             :   // If Ptr is null, then that means this mem operand contains a
     136             :   // PseudoSourceValue like GOT.
     137          39 :   if (!Ptr || isa<UndefValue>(Ptr) ||
     138          39 :       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
     139             :     return true;
     140             : 
     141          15 :   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
     142             :     return true;
     143             : 
     144             :   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
     145           2 :     return AMDGPU::isArgPassedInSGPR(Arg);
     146             : 
     147             :   const Instruction *I = dyn_cast<Instruction>(Ptr);
     148          12 :   return I && I->getMetadata("amdgpu.uniform");
     149             : }

Generated by: LCOV version 1.13