LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUInstrInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 24 26 92.3 %
Date: 2018-02-25 19:55:18 Functions: 4 5 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief Implementation of the TargetInstrInfo class that is common to all
      12             : /// AMD GPUs.
      13             : //
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPUInstrInfo.h"
      17             : #include "AMDGPURegisterInfo.h"
      18             : #include "AMDGPUTargetMachine.h"
      19             : #include "llvm/CodeGen/MachineFrameInfo.h"
      20             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      21             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      22             : 
      23             : using namespace llvm;
      24             : 
      25             : #define GET_INSTRINFO_CTOR_DTOR
      26             : #include "AMDGPUGenInstrInfo.inc"
      27             : 
      28             : // Pin the vtable to this file.
      29           0 : void AMDGPUInstrInfo::anchor() {}
      30             : 
      31        2330 : AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
      32             :   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
      33             :     ST(ST),
      34        4660 :     AMDGPUASI(ST.getAMDGPUAS()) {}
      35             : 
      36             : // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
      37             : // the first 16 loads will be interleaved with the stores, and the next 16 will
      38             : // be clustered as expected. It should really split into 2 16 store batches.
      39             : //
      40             : // Loads are clustered until this returns false, rather than trying to schedule
      41             : // groups of stores. This also means we have to deal with saying different
      42             : // address space loads should be clustered, and ones which might cause bank
      43             : // conflicts.
      44             : //
      45             : // This might be deprecated so it might not be worth that much effort to fix.
      46       55986 : bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
      47             :                                               int64_t Offset0, int64_t Offset1,
      48             :                                               unsigned NumLoads) const {
      49             :   assert(Offset1 > Offset0 &&
      50             :          "Second offset should be larger than first offset!");
      51             :   // If we have less than 16 loads in a row, and the offsets are within 64
      52             :   // bytes, then schedule together.
      53             : 
      54             :   // A cacheline is 64 bytes (for global memory).
      55       55986 :   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
      56             : }
      57             : 
      58             : // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
      59             : enum SIEncodingFamily {
      60             :   SI = 0,
      61             :   VI = 1,
      62             :   SDWA = 2,
      63             :   SDWA9 = 3,
      64             :   GFX80 = 4,
      65             :   GFX9 = 5
      66             : };
      67             : 
      68             : static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
      69     1222475 :   switch (ST.getGeneration()) {
      70             :   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
      71             :   case AMDGPUSubtarget::SEA_ISLANDS:
      72             :     return SIEncodingFamily::SI;
      73      598227 :   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
      74             :   case AMDGPUSubtarget::GFX9:
      75             :     return SIEncodingFamily::VI;
      76             : 
      77             :   // FIXME: This should never be called for r600 GPUs.
      78             :   case AMDGPUSubtarget::R600:
      79             :   case AMDGPUSubtarget::R700:
      80             :   case AMDGPUSubtarget::EVERGREEN:
      81             :   case AMDGPUSubtarget::NORTHERN_ISLANDS:
      82             :     return SIEncodingFamily::SI;
      83             :   }
      84             : 
      85           0 :   llvm_unreachable("Unknown subtarget generation!");
      86             : }
      87             : 
      88     1222475 : int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
      89     1222475 :   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
      90             : 
      91     2524196 :   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
      92       79246 :     ST.getGeneration() >= AMDGPUSubtarget::GFX9)
      93             :     Gen = SIEncodingFamily::GFX9;
      94             : 
      95     1222475 :   if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
      96       52417 :     Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
      97             :                                                       : SIEncodingFamily::SDWA;
      98             :   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
      99             :   // subtarget has UnpackedD16VMem feature.
     100             :   // TODO: remove this when we discard GFX80 encoding.
     101     1674325 :   if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
     102     1222577 :                               && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
     103             :     Gen = SIEncodingFamily::GFX80;
     104             : 
     105     1222475 :   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
     106             : 
     107             :   // -1 means that Opcode is already a native instruction.
     108     1222475 :   if (MCOp == -1)
     109             :     return Opcode;
     110             : 
     111             :   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
     112             :   // no encoding in the given subtarget generation.
     113      947989 :   if (MCOp == (uint16_t)-1)
     114             :     return -1;
     115             : 
     116      932323 :   return MCOp;
     117             : }
     118             : 
     119             : // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
     120      121824 : bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
     121             :   const Value *Ptr = MMO->getValue();
     122             :   // UndefValue means this is a load of a kernel input.  These are uniform.
     123             :   // Sometimes LDS instructions have constant pointers.
     124             :   // If Ptr is null, then that means this mem operand contains a
     125             :   // PseudoSourceValue like GOT.
     126      121824 :   if (!Ptr || isa<UndefValue>(Ptr) ||
     127      121824 :       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
     128             :     return true;
     129             : 
     130       14395 :   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
     131             :     return true;
     132             : 
     133             :   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
     134        2329 :     return AMDGPU::isArgPassedInSGPR(Arg);
     135             : 
     136             :   const Instruction *I = dyn_cast<Instruction>(Ptr);
     137        3365 :   return I && I->getMetadata("amdgpu.uniform");
     138             : }

Generated by: LCOV version 1.13