LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIRegisterInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 593 688 86.2 %
Date: 2017-09-14 15:23:50 Functions: 39 43 90.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief SI implementation of the TargetRegisterInfo class.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "SIRegisterInfo.h"
      16             : #include "AMDGPUSubtarget.h"
      17             : #include "SIInstrInfo.h"
      18             : #include "SIMachineFunctionInfo.h"
      19             : #include "llvm/CodeGen/MachineFrameInfo.h"
      20             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      21             : #include "llvm/CodeGen/RegisterScavenging.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include "llvm/IR/LLVMContext.h"
      24             : 
      25             : using namespace llvm;
      26             : 
      27             : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
      28      542392 :   for (unsigned i = 0; PSets[i] != -1; ++i) {
      29      231684 :     if (PSets[i] == (int)PSetID)
      30             :       return true;
      31             :   }
      32             :   return false;
      33             : }
      34             : 
      35       96984 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
      36             :                                          BitVector &PressureSets) const {
      37      281972 :   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
      38      193968 :     const int *PSets = getRegUnitPressureSets(*U);
      39       96984 :     if (hasPressureSet(PSets, PSetID)) {
      40             :       PressureSets.set(PSetID);
      41             :       break;
      42             :     }
      43             :   }
      44       96984 : }
      45             : 
      46       72306 : static cl::opt<bool> EnableSpillSGPRToSMEM(
      47             :   "amdgpu-spill-sgpr-to-smem",
      48      216918 :   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
      49      289224 :   cl::init(false));
      50             : 
      51       72306 : static cl::opt<bool> EnableSpillSGPRToVGPR(
      52             :   "amdgpu-spill-sgpr-to-vgpr",
      53      216918 :   cl::desc("Enable spilling VGPRs to SGPRs"),
      54             :   cl::ReallyHidden,
      55      289224 :   cl::init(true));
      56             : 
      57        1796 : SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
      58             :   AMDGPURegisterInfo(),
      59             :   SGPRPressureSets(getNumRegPressureSets()),
      60             :   VGPRPressureSets(getNumRegPressureSets()),
      61             :   SpillSGPRToVGPR(false),
      62        1796 :   SpillSGPRToSMEM(false) {
      63        1796 :   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
      64           5 :     SpillSGPRToSMEM = true;
      65        1791 :   else if (EnableSpillSGPRToVGPR)
      66        1787 :     SpillSGPRToVGPR = true;
      67             : 
      68        1796 :   unsigned NumRegPressureSets = getNumRegPressureSets();
      69             : 
      70        1796 :   SGPRSetID = NumRegPressureSets;
      71        1796 :   VGPRSetID = NumRegPressureSets;
      72             : 
      73       50288 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      74       48492 :     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
      75       48492 :     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
      76             :   }
      77             : 
      78             :   // Determine the number of reg units for each pressure set.
      79        5388 :   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
      80     3123244 :   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
      81     3121448 :     const int *PSets = getRegUnitPressureSets(i);
      82    10267732 :     for (unsigned j = 0; PSets[j] != -1; ++j) {
      83    14292568 :       ++PressureSetRegUnits[PSets[j]];
      84             :     }
      85             :   }
      86             : 
      87             :   unsigned VGPRMax = 0, SGPRMax = 0;
      88       98780 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      89        5388 :     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
      90        1796 :       VGPRSetID = i;
      91        1796 :       VGPRMax = PressureSetRegUnits[i];
      92        1796 :       continue;
      93             :     }
      94       14368 :     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
      95        7184 :       SGPRSetID = i;
      96        7184 :       SGPRMax = PressureSetRegUnits[i];
      97             :     }
      98             :   }
      99             : 
     100             :   assert(SGPRSetID < NumRegPressureSets &&
     101             :          VGPRSetID < NumRegPressureSets);
     102        1796 : }
     103             : 
     104      703898 : void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
     105      703898 :   MCRegAliasIterator R(Reg, this, true);
     106             : 
     107     8569794 :   for (; R.isValid(); ++R)
     108     7865896 :     Reserved.set(*R);
     109      703898 : }
     110             : 
     111       14434 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
     112             :   const MachineFunction &MF) const {
     113             : 
     114       14434 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     115       28868 :   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
     116       28868 :   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     117       28868 :   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
     118             : }
     119             : 
     120             : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
     121             :   unsigned Reg;
     122             : 
     123             :   // Try to place it in a hole after PrivateSegmentBufferReg.
     124       14402 :   if (RegCount & 3) {
     125             :     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
     126             :     // alignment constraints, so we have a hole where can put the wave offset.
     127       14229 :     Reg = RegCount - 1;
     128             :   } else {
     129             :     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
     130             :     // wave offset before it.
     131         173 :     Reg = RegCount - 5;
     132             :   }
     133             : 
     134             :   return Reg;
     135             : }
     136             : 
     137       14402 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
     138             :   const MachineFunction &MF) const {
     139       14402 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     140       28804 :   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
     141       28804 :   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
     142             : }
     143             : 
     144         988 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
     145             :   const MachineFunction &MF) const {
     146         988 :   return AMDGPU::SGPR32;
     147             : }
     148             : 
     149       29970 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     150       29970 :   BitVector Reserved(getNumRegs());
     151             : 
     152             :   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
     153             :   // this seems likely to result in bugs, so I'm marking them as reserved.
     154       29970 :   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
     155       29970 :   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
     156             : 
     157             :   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
     158       29970 :   reserveRegisterTuples(Reserved, AMDGPU::M0);
     159             : 
     160             :   // Reserve the memory aperture registers.
     161       29970 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
     162       29970 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
     163       29970 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
     164       29970 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
     165             : 
     166             :   // Reserve Trap Handler registers - support is not implemented in Codegen.
     167       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TBA);
     168       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TMA);
     169       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
     170       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
     171       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
     172       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
     173       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
     174       29970 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
     175             : 
     176       29970 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     177             : 
     178       29970 :   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
     179       59940 :   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     180      159398 :   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     181      258856 :     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     182      129428 :     reserveRegisterTuples(Reserved, Reg);
     183             :   }
     184             : 
     185       29970 :   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
     186       59940 :   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
     187       35010 :   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     188       10080 :     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     189        5040 :     reserveRegisterTuples(Reserved, Reg);
     190             :   }
     191             : 
     192       29970 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     193             : 
     194       29970 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     195       29970 :   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
     196             :     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
     197       29970 :     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
     198             :   }
     199             : 
     200       29970 :   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
     201       29970 :   if (ScratchRSrcReg != AMDGPU::NoRegister) {
     202             :     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
     203             :     // to spill.
     204             :     // TODO: May need to reserve a VGPR if doing LDS spilling.
     205       29970 :     reserveRegisterTuples(Reserved, ScratchRSrcReg);
     206             :     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
     207             :   }
     208             : 
     209             :   // We have to assume the SP is needed in case there are calls in the function,
     210             :   // which is detected after the function is lowered. If we aren't really going
     211             :   // to need SP, don't bother reserving it.
     212       29970 :   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
     213             : 
     214       29970 :   if (StackPtrReg != AMDGPU::NoRegister) {
     215       29970 :     reserveRegisterTuples(Reserved, StackPtrReg);
     216             :     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
     217             :   }
     218             : 
     219       29970 :   unsigned FrameReg = MFI->getFrameOffsetReg();
     220       29970 :   if (FrameReg != AMDGPU::NoRegister) {
     221       29970 :     reserveRegisterTuples(Reserved, FrameReg);
     222             :     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
     223             :   }
     224             : 
     225       29970 :   return Reserved;
     226             : }
     227             : 
     228       29664 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
     229       29664 :   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
     230       29664 :   if (Info->isEntryFunction()) {
     231       28338 :     const MachineFrameInfo &MFI = Fn.getFrameInfo();
     232       28338 :     return MFI.hasStackObjects() || MFI.hasCalls();
     233             :   }
     234             : 
     235             :   // May need scavenger for dealing with callee saved registers.
     236             :   return true;
     237             : }
     238             : 
     239       14832 : bool SIRegisterInfo::requiresFrameIndexScavenging(
     240             :   const MachineFunction &MF) const {
     241       14832 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     242       14832 :   if (MFI.hasStackObjects())
     243             :     return true;
     244             : 
     245             :   // May need to deal with callee saved registers.
     246       14288 :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     247       14288 :   return !Info->isEntryFunction();
     248             : }
     249             : 
     250       14522 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
     251             :   const MachineFunction &MF) const {
     252             :   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
     253             :   // create a virtual register for it during frame index elimination, so the
     254             :   // scavenger is directly needed.
     255       29588 :   return MF.getFrameInfo().hasStackObjects() &&
     256       14781 :          MF.getSubtarget<SISubtarget>().hasScalarStores() &&
     257       14781 :          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
     258             : }
     259             : 
     260       14825 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
     261             :   const MachineFunction &) const {
     262             :   // There are no special dedicated stack or frame pointers.
     263       14825 :   return true;
     264             : }
     265             : 
     266       29688 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     267             :   // This helps catch bugs as verifier errors.
     268       29688 :   return true;
     269             : }
     270             : 
     271        4409 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
     272             :   assert(SIInstrInfo::isMUBUF(*MI));
     273             : 
     274        8818 :   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     275        4409 :                                           AMDGPU::OpName::offset);
     276        8818 :   return MI->getOperand(OffIdx).getImm();
     277             : }
     278             : 
     279           4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
     280             :                                                  int Idx) const {
     281           4 :   if (!SIInstrInfo::isMUBUF(*MI))
     282             :     return 0;
     283             : 
     284             :   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     285             :                                            AMDGPU::OpName::vaddr) &&
     286             :          "Should never see frame index on non-address operand");
     287             : 
     288           4 :   return getMUBUFInstrOffset(MI);
     289             : }
     290             : 
     291        4747 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     292        4747 :   if (!MI->mayLoadOrStore())
     293             :     return false;
     294             : 
     295        4405 :   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
     296             : 
     297        4405 :   return !isUInt<12>(FullOffset);
     298             : }
     299             : 
     300           0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
     301             :                                                   unsigned BaseReg,
     302             :                                                   int FrameIdx,
     303             :                                                   int64_t Offset) const {
     304           0 :   MachineBasicBlock::iterator Ins = MBB->begin();
     305           0 :   DebugLoc DL; // Defaults to "unknown"
     306             : 
     307           0 :   if (Ins != MBB->end())
     308           0 :     DL = Ins->getDebugLoc();
     309             : 
     310           0 :   MachineFunction *MF = MBB->getParent();
     311           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     312           0 :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     313             : 
     314           0 :   if (Offset == 0) {
     315           0 :     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
     316           0 :       .addFrameIndex(FrameIdx);
     317           0 :     return;
     318             :   }
     319             : 
     320           0 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     321           0 :   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     322             : 
     323           0 :   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     324             : 
     325           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     326           0 :     .addImm(Offset);
     327           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
     328           0 :     .addFrameIndex(FrameIdx);
     329             : 
     330           0 :   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     331           0 :     .addReg(OffsetReg, RegState::Kill)
     332           0 :     .addReg(FIReg);
     333             : }
     334             : 
     335           0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     336             :                                        int64_t Offset) const {
     337             : 
     338           0 :   MachineBasicBlock *MBB = MI.getParent();
     339           0 :   MachineFunction *MF = MBB->getParent();
     340           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     341           0 :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     342             : 
     343             : #ifndef NDEBUG
     344             :   // FIXME: Is it possible to be storing a frame index to itself?
     345             :   bool SeenFI = false;
     346             :   for (const MachineOperand &MO: MI.operands()) {
     347             :     if (MO.isFI()) {
     348             :       if (SeenFI)
     349             :         llvm_unreachable("should not see multiple frame indices");
     350             : 
     351             :       SeenFI = true;
     352             :     }
     353             :   }
     354             : #endif
     355             : 
     356           0 :   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
     357             :   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
     358             :   assert(TII->isMUBUF(MI));
     359             :   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
     360             :          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
     361             :          "should only be seeing frame offset relative FrameIndex");
     362             : 
     363             : 
     364           0 :   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
     365           0 :   int64_t NewOffset = OffsetOp->getImm() + Offset;
     366             :   assert(isUInt<12>(NewOffset) && "offset should be legal");
     367             : 
     368           0 :   FIOp->ChangeToRegister(BaseReg, false);
     369           0 :   OffsetOp->setImm(NewOffset);
     370           0 : }
     371             : 
     372           0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     373             :                                         unsigned BaseReg,
     374             :                                         int64_t Offset) const {
     375           0 :   if (!SIInstrInfo::isMUBUF(*MI))
     376             :     return false;
     377             : 
     378           0 :   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
     379             : 
     380           0 :   return isUInt<12>(NewOffset);
     381             : }
     382             : 
     383           0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
     384             :   const MachineFunction &MF, unsigned Kind) const {
     385             :   // This is inaccurate. It depends on the instruction and address space. The
     386             :   // only place where we should hit this is for dealing with frame indexes /
     387             :   // private accesses, so this is correct in that case.
     388           0 :   return &AMDGPU::VGPR_32RegClass;
     389             : }
     390             : 
     391        1166 : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
     392             : 
     393        1166 :   switch (Op) {
     394             :   case AMDGPU::SI_SPILL_S512_SAVE:
     395             :   case AMDGPU::SI_SPILL_S512_RESTORE:
     396             :   case AMDGPU::SI_SPILL_V512_SAVE:
     397             :   case AMDGPU::SI_SPILL_V512_RESTORE:
     398             :     return 16;
     399           0 :   case AMDGPU::SI_SPILL_S256_SAVE:
     400             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     401             :   case AMDGPU::SI_SPILL_V256_SAVE:
     402             :   case AMDGPU::SI_SPILL_V256_RESTORE:
     403           0 :     return 8;
     404         657 :   case AMDGPU::SI_SPILL_S128_SAVE:
     405             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     406             :   case AMDGPU::SI_SPILL_V128_SAVE:
     407             :   case AMDGPU::SI_SPILL_V128_RESTORE:
     408         657 :     return 4;
     409           0 :   case AMDGPU::SI_SPILL_V96_SAVE:
     410             :   case AMDGPU::SI_SPILL_V96_RESTORE:
     411           0 :     return 3;
     412          12 :   case AMDGPU::SI_SPILL_S64_SAVE:
     413             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     414             :   case AMDGPU::SI_SPILL_V64_SAVE:
     415             :   case AMDGPU::SI_SPILL_V64_RESTORE:
     416          12 :     return 2;
     417         497 :   case AMDGPU::SI_SPILL_S32_SAVE:
     418             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     419             :   case AMDGPU::SI_SPILL_V32_SAVE:
     420             :   case AMDGPU::SI_SPILL_V32_RESTORE:
     421         497 :     return 1;
     422           0 :   default: llvm_unreachable("Invalid spill opcode");
     423             :   }
     424             : }
     425             : 
     426             : static int getOffsetMUBUFStore(unsigned Opc) {
     427        3051 :   switch (Opc) {
     428             :   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
     429             :     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
     430             :   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
     431             :     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
     432             :   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
     433             :     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
     434             :   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
     435             :     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
     436             :   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
     437             :     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
     438             :   default:
     439             :     return -1;
     440             :   }
     441             : }
     442             : 
     443             : static int getOffsetMUBUFLoad(unsigned Opc) {
     444        1735 :   switch (Opc) {
     445             :   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
     446             :     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
     447             :   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
     448             :     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
     449             :   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
     450             :     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
     451             :   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
     452             :     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
     453             :   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
     454             :     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
     455             :   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
     456             :     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
     457             :   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
     458             :     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
     459             :   default:
     460             :     return -1;
     461             :   }
     462             : }
     463             : 
     464             : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
     465             : // need to handle the case where an SGPR may need to be spilled while spilling.
     466        4786 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     467             :                                       MachineFrameInfo &MFI,
     468             :                                       MachineBasicBlock::iterator MI,
     469             :                                       int Index,
     470             :                                       int64_t Offset) {
     471        4786 :   MachineBasicBlock *MBB = MI->getParent();
     472        9572 :   const DebugLoc &DL = MI->getDebugLoc();
     473        4786 :   bool IsStore = MI->mayStore();
     474             : 
     475        9572 :   unsigned Opc = MI->getOpcode();
     476        4786 :   int LoadStoreOp = IsStore ?
     477             :     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
     478             :   if (LoadStoreOp == -1)
     479             :     return false;
     480             : 
     481        4786 :   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
     482        9572 :   BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
     483        4786 :     .add(*Reg)
     484        9572 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
     485        9572 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
     486        4786 :     .addImm(Offset)
     487        4786 :     .addImm(0) // glc
     488        4786 :     .addImm(0) // slc
     489        4786 :     .addImm(0) // tfe
     490       19144 :     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     491             :   return true;
     492             : }
     493             : 
     494        2244 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     495             :                                          unsigned LoadStoreOp,
     496             :                                          int Index,
     497             :                                          unsigned ValueReg,
     498             :                                          bool IsKill,
     499             :                                          unsigned ScratchRsrcReg,
     500             :                                          unsigned ScratchOffsetReg,
     501             :                                          int64_t InstOffset,
     502             :                                          MachineMemOperand *MMO,
     503             :                                          RegScavenger *RS) const {
     504        2244 :   MachineBasicBlock *MBB = MI->getParent();
     505        2244 :   MachineFunction *MF = MI->getParent()->getParent();
     506        2244 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     507        2244 :   const SIInstrInfo *TII = ST.getInstrInfo();
     508        2244 :   const MachineFrameInfo &MFI = MF->getFrameInfo();
     509             : 
     510        4488 :   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
     511        4488 :   const DebugLoc &DL = MI->getDebugLoc();
     512        2244 :   bool IsStore = Desc.mayStore();
     513             : 
     514        2244 :   bool RanOutOfSGPRs = false;
     515        2244 :   bool Scavenged = false;
     516        2244 :   unsigned SOffset = ScratchOffsetReg;
     517             : 
     518        2244 :   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
     519        4488 :   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
     520        2244 :   unsigned Size = NumSubRegs * 4;
     521        2244 :   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
     522        2244 :   const int64_t OriginalImmOffset = Offset;
     523             : 
     524        2244 :   unsigned Align = MFI.getObjectAlignment(Index);
     525        2244 :   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
     526             : 
     527        2244 :   if (!isUInt<12>(Offset + Size)) {
     528         232 :     SOffset = AMDGPU::NoRegister;
     529             : 
     530             :     // We don't have access to the register scavenger if this function is called
     531             :     // during  PEI::scavengeFrameVirtualRegs().
     532         232 :     if (RS)
     533           0 :       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
     534             : 
     535           0 :     if (SOffset == AMDGPU::NoRegister) {
     536             :       // There are no free SGPRs, and since we are in the process of spilling
     537             :       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
     538             :       // on SI/CI and on VI it is true until we implement spilling using scalar
     539             :       // stores), we have no way to free up an SGPR.  Our solution here is to
     540             :       // add the offset directly to the ScratchOffset register, and then
     541             :       // subtract the offset after the spill to return ScratchOffset to it's
     542             :       // original value.
     543             :       RanOutOfSGPRs = true;
     544             :       SOffset = ScratchOffsetReg;
     545             :     } else {
     546           0 :       Scavenged = true;
     547             :     }
     548             : 
     549         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
     550         232 :       .addReg(ScratchOffsetReg)
     551         232 :       .addImm(Offset);
     552             : 
     553         232 :     Offset = 0;
     554             :   }
     555             : 
     556        2244 :   const unsigned EltSize = 4;
     557             : 
     558        8463 :   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     559       11535 :     unsigned SubReg = NumSubRegs == 1 ?
     560       11535 :       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
     561             : 
     562        6219 :     unsigned SOffsetRegState = 0;
     563       12438 :     unsigned SrcDstRegState = getDefRegState(!IsStore);
     564        6219 :     if (i + 1 == e) {
     565        2244 :       SOffsetRegState |= getKillRegState(Scavenged);
     566             :       // The last implicit use carries the "Kill" flag.
     567        2244 :       SrcDstRegState |= getKillRegState(IsKill);
     568             :     }
     569             : 
     570       12438 :     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
     571             :     MachineMemOperand *NewMMO
     572       18657 :       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
     573       18657 :                                  EltSize, MinAlign(Align, EltSize * i));
     574             : 
     575       12438 :     auto MIB = BuildMI(*MBB, MI, DL, Desc)
     576       12438 :       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
     577        6219 :       .addReg(ScratchRsrcReg)
     578        6219 :       .addReg(SOffset, SOffsetRegState)
     579        6219 :       .addImm(Offset)
     580        6219 :       .addImm(0) // glc
     581        6219 :       .addImm(0) // slc
     582        6219 :       .addImm(0) // tfe
     583        6219 :       .addMemOperand(NewMMO);
     584             : 
     585        6219 :     if (NumSubRegs > 1)
     586        5316 :       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
     587             :   }
     588             : 
     589        2244 :   if (RanOutOfSGPRs) {
     590             :     // Subtract the offset we added to the ScratchOffset register.
     591         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
     592         232 :       .addReg(ScratchOffsetReg)
     593         232 :       .addImm(OriginalImmOffset);
     594             :   }
     595        2244 : }
     596             : 
     597             : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
     598             :                                                      bool Store) {
     599          28 :   if (SuperRegSize % 16 == 0) {
     600             :     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
     601             :                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
     602             :   }
     603             : 
     604          22 :   if (SuperRegSize % 8 == 0) {
     605             :     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
     606             :                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
     607             :   }
     608             : 
     609             :   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
     610          12 :                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
     611             : }
     612             : 
     613         597 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
     614             :                                int Index,
     615             :                                RegScavenger *RS,
     616             :                                bool OnlyToVGPR) const {
     617         597 :   MachineBasicBlock *MBB = MI->getParent();
     618         597 :   MachineFunction *MF = MBB->getParent();
     619         597 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     620             : 
     621             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     622         597 :     = MFI->getSGPRToVGPRSpills(Index);
     623        1194 :   bool SpillToVGPR = !VGPRSpills.empty();
     624         597 :   if (OnlyToVGPR && !SpillToVGPR)
     625             :     return false;
     626             : 
     627         597 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     628         597 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     629         597 :   const SIInstrInfo *TII = ST.getInstrInfo();
     630             : 
     631         597 :   unsigned SuperReg = MI->getOperand(0).getReg();
     632        1194 :   bool IsKill = MI->getOperand(0).isKill();
     633        1194 :   const DebugLoc &DL = MI->getDebugLoc();
     634             : 
     635         597 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     636             : 
     637         597 :   bool SpillToSMEM = spillSGPRToSMEM();
     638         597 :   if (SpillToSMEM && OnlyToVGPR)
     639             :     return false;
     640             : 
     641             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     642             : 
     643         597 :   unsigned OffsetReg = AMDGPU::M0;
     644         597 :   unsigned M0CopyReg = AMDGPU::NoRegister;
     645             : 
     646         597 :   if (SpillToSMEM) {
     647          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     648          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     649          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     650          14 :         .addReg(AMDGPU::M0);
     651             :     }
     652             :   }
     653             : 
     654             :   unsigned ScalarStoreOp;
     655         597 :   unsigned EltSize = 4;
     656         597 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     657         611 :   if (SpillToSMEM && isSGPRClass(RC)) {
     658             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     659             :     // able to spill wider vmem spills.
     660          14 :     std::tie(EltSize, ScalarStoreOp) =
     661          56 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
     662             :   }
     663             : 
     664         597 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     665         597 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     666             : 
     667             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     668        1194 :   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
     669        1613 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     670        1560 :     unsigned SubReg = NumSubRegs == 1 ?
     671        2104 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     672             : 
     673        1016 :     if (SpillToSMEM) {
     674          15 :       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     675             : 
     676             :       // The allocated memory size is really the wavefront size * the frame
     677             :       // index size. The widest register class is 64 bytes, so a 4-byte scratch
     678             :       // allocation is enough to spill this in a single stack object.
     679             :       //
     680             :       // FIXME: Frame size/offsets are computed earlier than this, so the extra
     681             :       // space is still unnecessarily allocated.
     682             : 
     683          15 :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     684             :       MachinePointerInfo PtrInfo
     685          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     686             :       MachineMemOperand *MMO
     687          45 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     688          45 :                                    EltSize, MinAlign(Align, EltSize * i));
     689             : 
     690             :       // SMEM instructions only support a single offset, so increment the wave
     691             :       // offset.
     692             : 
     693          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     694          15 :       if (Offset != 0) {
     695          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     696          15 :           .addReg(MFI->getFrameOffsetReg())
     697          15 :           .addImm(Offset);
     698             :       } else {
     699           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     700           0 :           .addReg(MFI->getFrameOffsetReg());
     701             :       }
     702             : 
     703          45 :       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
     704          15 :         .addReg(SubReg, getKillRegState(IsKill)) // sdata
     705          15 :         .addReg(MFI->getScratchRSrcReg())        // sbase
     706          15 :         .addReg(OffsetReg, RegState::Kill)       // soff
     707          15 :         .addImm(0)                               // glc
     708          15 :         .addMemOperand(MMO);
     709             : 
     710          15 :       continue;
     711             :     }
     712             : 
     713        1001 :     if (SpillToVGPR) {
     714        1838 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     715             : 
     716        1838 :       BuildMI(*MBB, MI, DL,
     717             :               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
     718        1838 :               Spill.VGPR)
     719         919 :         .addReg(SubReg, getKillRegState(IsKill))
     720        1838 :         .addImm(Spill.Lane);
     721             : 
     722             :       // FIXME: Since this spills to another register instead of an actual
     723             :       // frame index, we should delete the frame index when all references to
     724             :       // it are fixed.
     725             :     } else {
     726             :       // XXX - Can to VGPR spill fail for some subregisters but not others?
     727          82 :       if (OnlyToVGPR)
     728           0 :         return false;
     729             : 
     730             :       // Spill SGPR to a frame index.
     731             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     732          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     733             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     734             : 
     735             :       MachineInstrBuilder Mov
     736         246 :         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
     737          82 :         .addReg(SubReg, SubKillState);
     738             : 
     739             : 
     740             :       // There could be undef components of a spilled super register.
     741             :       // TODO: Can we detect this and skip the spill?
     742          82 :       if (NumSubRegs > 1) {
     743             :         // The last implicit use of the SuperReg carries the "Kill" flag.
     744          76 :         unsigned SuperKillState = 0;
     745          76 :         if (i + 1 == e)
     746             :           SuperKillState |= getKillRegState(IsKill);
     747          76 :         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
     748             :       }
     749             : 
     750          82 :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     751             :       MachinePointerInfo PtrInfo
     752          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     753             :       MachineMemOperand *MMO
     754         246 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     755         246 :                                    EltSize, MinAlign(Align, EltSize * i));
     756         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
     757          82 :         .addReg(TmpReg, RegState::Kill)    // src
     758          82 :         .addFrameIndex(Index)              // vaddr
     759          82 :         .addReg(MFI->getScratchRSrcReg())  // srrsrc
     760          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     761         164 :         .addImm(i * 4)                     // offset
     762          82 :         .addMemOperand(MMO);
     763             :     }
     764             :   }
     765             : 
     766         597 :   if (M0CopyReg != AMDGPU::NoRegister) {
     767          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     768          14 :       .addReg(M0CopyReg, RegState::Kill);
     769             :   }
     770             : 
     771         597 :   MI->eraseFromParent();
     772        1194 :   MFI->addToSpilledSGPRs(NumSubRegs);
     773         597 :   return true;
     774             : }
     775             : 
     776         585 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
     777             :                                  int Index,
     778             :                                  RegScavenger *RS,
     779             :                                  bool OnlyToVGPR) const {
     780         585 :   MachineFunction *MF = MI->getParent()->getParent();
     781         585 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     782         585 :   MachineBasicBlock *MBB = MI->getParent();
     783         585 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     784             : 
     785             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     786         585 :     = MFI->getSGPRToVGPRSpills(Index);
     787        1170 :   bool SpillToVGPR = !VGPRSpills.empty();
     788         585 :   if (OnlyToVGPR && !SpillToVGPR)
     789             :     return false;
     790             : 
     791         585 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     792         585 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     793         585 :   const SIInstrInfo *TII = ST.getInstrInfo();
     794        1170 :   const DebugLoc &DL = MI->getDebugLoc();
     795             : 
     796         585 :   unsigned SuperReg = MI->getOperand(0).getReg();
     797         585 :   bool SpillToSMEM = spillSGPRToSMEM();
     798         585 :   if (SpillToSMEM && OnlyToVGPR)
     799             :     return false;
     800             : 
     801             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     802             : 
     803         585 :   unsigned OffsetReg = AMDGPU::M0;
     804         585 :   unsigned M0CopyReg = AMDGPU::NoRegister;
     805             : 
     806         585 :   if (SpillToSMEM) {
     807          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     808          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     809          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     810          14 :         .addReg(AMDGPU::M0);
     811             :     }
     812             :   }
     813             : 
     814         585 :   unsigned EltSize = 4;
     815             :   unsigned ScalarLoadOp;
     816             : 
     817         585 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     818         599 :   if (SpillToSMEM && isSGPRClass(RC)) {
     819             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     820             :     // able to spill wider vmem spills.
     821          14 :     std::tie(EltSize, ScalarLoadOp) =
     822          56 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
     823             :   }
     824             : 
     825         585 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     826         585 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     827             : 
     828             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     829         585 :   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     830             : 
     831        1583 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     832        1530 :     unsigned SubReg = NumSubRegs == 1 ?
     833        2062 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     834             : 
     835         998 :     if (SpillToSMEM) {
     836             :       // FIXME: Size may be > 4 but extra bytes wasted.
     837          15 :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     838             :       MachinePointerInfo PtrInfo
     839          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     840             :       MachineMemOperand *MMO
     841          45 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
     842          45 :                                    EltSize, MinAlign(Align, EltSize * i));
     843             : 
     844             :       // Add i * 4 offset
     845          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     846          15 :       if (Offset != 0) {
     847          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     848          15 :           .addReg(MFI->getFrameOffsetReg())
     849          15 :           .addImm(Offset);
     850             :       } else {
     851           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     852           0 :           .addReg(MFI->getFrameOffsetReg());
     853             :       }
     854             : 
     855             :       auto MIB =
     856          45 :         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
     857          15 :         .addReg(MFI->getScratchRSrcReg()) // sbase
     858          15 :         .addReg(OffsetReg, RegState::Kill)                // soff
     859          15 :         .addImm(0)                        // glc
     860          15 :         .addMemOperand(MMO);
     861             : 
     862          15 :       if (NumSubRegs > 1)
     863           2 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     864             : 
     865          15 :       continue;
     866             :     }
     867             : 
     868         983 :     if (SpillToVGPR) {
     869        1802 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     870             :       auto MIB =
     871        1802 :         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
     872         901 :                 SubReg)
     873         901 :         .addReg(Spill.VGPR)
     874        1802 :         .addImm(Spill.Lane);
     875             : 
     876         901 :       if (NumSubRegs > 1)
     877         454 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     878             :     } else {
     879          82 :       if (OnlyToVGPR)
     880           0 :         return false;
     881             : 
     882             :       // Restore SGPR from a stack slot.
     883             :       // FIXME: We should use S_LOAD_DWORD here for VI.
     884          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     885          82 :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     886             : 
     887             :       MachinePointerInfo PtrInfo
     888          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     889             : 
     890         246 :       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
     891             :         MachineMemOperand::MOLoad, EltSize,
     892         246 :         MinAlign(Align, EltSize * i));
     893             : 
     894         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
     895          82 :         .addFrameIndex(Index)              // vaddr
     896          82 :         .addReg(MFI->getScratchRSrcReg())  // srsrc
     897          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     898         164 :         .addImm(i * 4)                     // offset
     899          82 :         .addMemOperand(MMO);
     900             : 
     901             :       auto MIB =
     902         246 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
     903          82 :         .addReg(TmpReg, RegState::Kill);
     904             : 
     905          82 :       if (NumSubRegs > 1)
     906          76 :         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
     907             :     }
     908             :   }
     909             : 
     910         585 :   if (M0CopyReg != AMDGPU::NoRegister) {
     911          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     912          14 :       .addReg(M0CopyReg, RegState::Kill);
     913             :   }
     914             : 
     915         585 :   MI->eraseFromParent();
     916         585 :   return true;
     917             : }
     918             : 
     919             : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
     920             : /// a VGPR and the stack slot can be safely eliminated when all other users are
     921             : /// handled.
     922        1090 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
     923             :   MachineBasicBlock::iterator MI,
     924             :   int FI,
     925             :   RegScavenger *RS) const {
     926        1090 :   switch (MI->getOpcode()) {
     927         551 :   case AMDGPU::SI_SPILL_S512_SAVE:
     928             :   case AMDGPU::SI_SPILL_S256_SAVE:
     929             :   case AMDGPU::SI_SPILL_S128_SAVE:
     930             :   case AMDGPU::SI_SPILL_S64_SAVE:
     931             :   case AMDGPU::SI_SPILL_S32_SAVE:
     932         551 :     return spillSGPR(MI, FI, RS, true);
     933         539 :   case AMDGPU::SI_SPILL_S512_RESTORE:
     934             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     935             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     936             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     937             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     938         539 :     return restoreSGPR(MI, FI, RS, true);
     939           0 :   default:
     940           0 :     llvm_unreachable("not an SGPR spill instruction");
     941             :   }
     942             : }
     943             : 
     944        7478 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     945             :                                         int SPAdj, unsigned FIOperandNum,
     946             :                                         RegScavenger *RS) const {
     947        7478 :   MachineFunction *MF = MI->getParent()->getParent();
     948        7478 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     949        7478 :   MachineBasicBlock *MBB = MI->getParent();
     950        7478 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     951        7478 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     952        7478 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     953        7478 :   const SIInstrInfo *TII = ST.getInstrInfo();
     954       17627 :   DebugLoc DL = MI->getDebugLoc();
     955             : 
     956       14956 :   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
     957        7478 :   int Index = MI->getOperand(FIOperandNum).getIndex();
     958             : 
     959       14956 :   switch (MI->getOpcode()) {
     960             :     // SGPR register spill
     961          46 :     case AMDGPU::SI_SPILL_S512_SAVE:
     962             :     case AMDGPU::SI_SPILL_S256_SAVE:
     963             :     case AMDGPU::SI_SPILL_S128_SAVE:
     964             :     case AMDGPU::SI_SPILL_S64_SAVE:
     965             :     case AMDGPU::SI_SPILL_S32_SAVE: {
     966          46 :       spillSGPR(MI, Index, RS);
     967          46 :       break;
     968             :     }
     969             : 
     970             :     // SGPR register restore
     971          46 :     case AMDGPU::SI_SPILL_S512_RESTORE:
     972             :     case AMDGPU::SI_SPILL_S256_RESTORE:
     973             :     case AMDGPU::SI_SPILL_S128_RESTORE:
     974             :     case AMDGPU::SI_SPILL_S64_RESTORE:
     975             :     case AMDGPU::SI_SPILL_S32_RESTORE: {
     976          46 :       restoreSGPR(MI, Index, RS);
     977          46 :       break;
     978             :     }
     979             : 
     980             :     // VGPR register spill
     981        1166 :     case AMDGPU::SI_SPILL_V512_SAVE:
     982             :     case AMDGPU::SI_SPILL_V256_SAVE:
     983             :     case AMDGPU::SI_SPILL_V128_SAVE:
     984             :     case AMDGPU::SI_SPILL_V96_SAVE:
     985             :     case AMDGPU::SI_SPILL_V64_SAVE:
     986             :     case AMDGPU::SI_SPILL_V32_SAVE: {
     987        1166 :       const MachineOperand *VData = TII->getNamedOperand(*MI,
     988        1166 :                                                          AMDGPU::OpName::vdata);
     989        5830 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
     990             :             Index,
     991        1166 :             VData->getReg(), VData->isKill(),
     992        1166 :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
     993        1166 :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
     994        1166 :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
     995        1166 :             *MI->memoperands_begin(),
     996             :             RS);
     997        3498 :       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
     998        1166 :       MI->eraseFromParent();
     999        1166 :       break;
    1000             :     }
    1001        1078 :     case AMDGPU::SI_SPILL_V32_RESTORE:
    1002             :     case AMDGPU::SI_SPILL_V64_RESTORE:
    1003             :     case AMDGPU::SI_SPILL_V96_RESTORE:
    1004             :     case AMDGPU::SI_SPILL_V128_RESTORE:
    1005             :     case AMDGPU::SI_SPILL_V256_RESTORE:
    1006             :     case AMDGPU::SI_SPILL_V512_RESTORE: {
    1007        1078 :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1008        1078 :                                                          AMDGPU::OpName::vdata);
    1009             : 
    1010        5390 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
    1011             :             Index,
    1012        1078 :             VData->getReg(), VData->isKill(),
    1013        1078 :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1014        1078 :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1015        1078 :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1016        1078 :             *MI->memoperands_begin(),
    1017             :             RS);
    1018        1078 :       MI->eraseFromParent();
    1019        1078 :       break;
    1020             :     }
    1021             : 
    1022        5142 :     default: {
    1023       10284 :       const DebugLoc &DL = MI->getDebugLoc();
    1024       10284 :       bool IsMUBUF = TII->isMUBUF(*MI);
    1025             : 
    1026        5482 :       if (!IsMUBUF &&
    1027         340 :           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
    1028             :         // Convert to an absolute stack address by finding the offset from the
    1029             :         // scratch wave base and scaling by the wave size.
    1030             :         //
    1031             :         // In an entry function/kernel the stack address is already the absolute
    1032             :         // address relative to the the scratch wave offset.
    1033             : 
    1034             :         unsigned DiffReg
    1035          21 :           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1036             : 
    1037          42 :         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
    1038          21 :         unsigned ResultReg = IsCopy ?
    1039          21 :           MI->getOperand(0).getReg() :
    1040          21 :           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1041             : 
    1042          63 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
    1043          21 :           .addReg(MFI->getFrameOffsetReg())
    1044          21 :           .addReg(MFI->getScratchWaveOffsetReg());
    1045             : 
    1046          21 :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1047          21 :         if (Offset == 0) {
    1048             :           // XXX - This never happens because of emergency scavenging slot at 0?
    1049           0 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
    1050           0 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1051           0 :             .addReg(DiffReg);
    1052             :         } else {
    1053             :           unsigned CarryOut
    1054          21 :             = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
    1055             :           unsigned ScaledReg
    1056          21 :             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1057             : 
    1058          63 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
    1059          63 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1060          21 :             .addReg(DiffReg, RegState::Kill);
    1061             : 
    1062             :           // TODO: Fold if use instruction is another add of a constant.
    1063          21 :           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
    1064          57 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
    1065          19 :               .addReg(CarryOut, RegState::Define | RegState::Dead)
    1066          19 :               .addImm(Offset)
    1067          19 :               .addReg(ScaledReg, RegState::Kill);
    1068             :           } else {
    1069             :             unsigned ConstOffsetReg
    1070           2 :               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1071             : 
    1072           6 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
    1073           2 :               .addImm(Offset);
    1074           6 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg)
    1075           2 :               .addReg(CarryOut, RegState::Define | RegState::Dead)
    1076           2 :               .addReg(ConstOffsetReg, RegState::Kill)
    1077           2 :               .addReg(ScaledReg, RegState::Kill);
    1078             :           }
    1079             : 
    1080             :           MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC);
    1081             :         }
    1082             : 
    1083             :         // Don't introduce an extra copy if we're just materializing in a mov.
    1084          21 :         if (IsCopy)
    1085          21 :           MI->eraseFromParent();
    1086             :         else
    1087           0 :           FIOp.ChangeToRegister(ResultReg, false, false, true);
    1088        4807 :         return;
    1089             :       }
    1090             : 
    1091        5121 :       if (IsMUBUF) {
    1092             :         // Disable offen so we don't need a 0 vgpr base.
    1093             :         assert(static_cast<int>(FIOperandNum) ==
    1094             :                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    1095             :                                           AMDGPU::OpName::vaddr));
    1096             : 
    1097             :         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
    1098             :                == MFI->getFrameOffsetReg());
    1099             : 
    1100        4802 :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1101             :         int64_t OldImm
    1102        4802 :           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
    1103        4802 :         int64_t NewOffset = OldImm + Offset;
    1104             : 
    1105        9588 :         if (isUInt<12>(NewOffset) &&
    1106        4786 :             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
    1107        4786 :           MI->eraseFromParent();
    1108        4786 :           return;
    1109             :         }
    1110             :       }
    1111             : 
    1112             :       // If the offset is simply too big, don't convert to a scratch wave offset
    1113             :       // relative index.
    1114             : 
    1115         335 :       int64_t Offset = FrameInfo.getObjectOffset(Index);
    1116         335 :       FIOp.ChangeToImmediate(Offset);
    1117         335 :       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
    1118          16 :         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1119          48 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    1120          16 :           .addImm(Offset);
    1121          16 :         FIOp.ChangeToRegister(TmpReg, false, false, true);
    1122             :       }
    1123             :     }
    1124             :   }
    1125             : }
    1126             : 
    1127     6565654 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
    1128             :   #define AMDGPU_REG_ASM_NAMES
    1129             :   #include "AMDGPURegAsmNames.inc.cpp"
    1130             : 
    1131             :   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
    1132             :     if (Reg >= BeginReg && Reg <= EndReg) {                \
    1133             :       unsigned Index = Reg - BeginReg;                     \
    1134             :       assert(Index < array_lengthof(RegTable));            \
    1135             :       return RegTable[Index];                              \
    1136             :     }
    1137             : 
    1138     7400944 :   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
    1139     6467924 :   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
    1140     5480484 :   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
    1141     4703152 :   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
    1142     4307096 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
    1143             :             VGPR96RegNames);
    1144             : 
    1145     4544689 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
    1146             :             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
    1147             :             VGPR128RegNames);
    1148     4118233 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
    1149             :             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
    1150             :             SGPR128RegNames);
    1151             : 
    1152     4253590 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
    1153             :             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1154             :             VGPR256RegNames);
    1155             : 
    1156     4012329 :   REG_RANGE(
    1157             :     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
    1158             :     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1159             :     VGPR512RegNames);
    1160             : 
    1161     3586914 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
    1162             :             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1163             :             SGPR256RegNames);
    1164             : 
    1165     3561629 :   REG_RANGE(
    1166             :     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
    1167             :     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1168             :     SGPR512RegNames
    1169             :   );
    1170             : 
    1171             : #undef REG_RANGE
    1172             : 
    1173             :   // FIXME: Rename flat_scr so we don't need to special case this.
    1174     3518875 :   switch (Reg) {
    1175        2851 :   case AMDGPU::FLAT_SCR:
    1176        2851 :     return "flat_scratch";
    1177        5983 :   case AMDGPU::FLAT_SCR_LO:
    1178        5983 :     return "flat_scratch_lo";
    1179        5983 :   case AMDGPU::FLAT_SCR_HI:
    1180        5983 :     return "flat_scratch_hi";
    1181     3504058 :   default:
    1182             :     // For the special named registers the default is fine.
    1183     3504058 :     return TargetRegisterInfo::getRegAsmName(Reg);
    1184             :   }
    1185             : }
    1186             : 
    1187             : // FIXME: This is very slow. It might be worth creating a map from physreg to
    1188             : // register class.
    1189     4399663 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
    1190             :   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
    1191             : 
    1192             :   static const TargetRegisterClass *const BaseClasses[] = {
    1193             :     &AMDGPU::VGPR_32RegClass,
    1194             :     &AMDGPU::SReg_32RegClass,
    1195             :     &AMDGPU::VReg_64RegClass,
    1196             :     &AMDGPU::SReg_64RegClass,
    1197             :     &AMDGPU::VReg_96RegClass,
    1198             :     &AMDGPU::VReg_128RegClass,
    1199             :     &AMDGPU::SReg_128RegClass,
    1200             :     &AMDGPU::VReg_256RegClass,
    1201             :     &AMDGPU::SReg_256RegClass,
    1202             :     &AMDGPU::VReg_512RegClass,
    1203             :     &AMDGPU::SReg_512RegClass,
    1204             :     &AMDGPU::SCC_CLASSRegClass,
    1205             :   };
    1206             : 
    1207    13969361 :   for (const TargetRegisterClass *BaseClass : BaseClasses) {
    1208    25476070 :     if (BaseClass->contains(Reg)) {
    1209             :       return BaseClass;
    1210             :     }
    1211             :   }
    1212             :   return nullptr;
    1213             : }
    1214             : 
    1215             : // TODO: It might be helpful to have some target specific flags in
    1216             : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
    1217     9246871 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
    1218    18493742 :   unsigned Size = getRegSizeInBits(*RC);
    1219     9246871 :   if (Size < 32)
    1220             :     return false;
    1221     9243030 :   switch (Size) {
    1222     4893543 :   case 32:
    1223     4893543 :     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
    1224     3166184 :   case 64:
    1225     3166184 :     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
    1226        1193 :   case 96:
    1227        1193 :     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
    1228     1070183 :   case 128:
    1229     1070183 :     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
    1230       74636 :   case 256:
    1231       74636 :     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
    1232       37291 :   case 512:
    1233       37291 :     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
    1234           0 :   default:
    1235           0 :     llvm_unreachable("Invalid register class size");
    1236             :   }
    1237             : }
    1238             : 
    1239      154462 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
    1240             :                                          const TargetRegisterClass *SRC) const {
    1241      308924 :   switch (getRegSizeInBits(*SRC)) {
    1242             :   case 32:
    1243             :     return &AMDGPU::VGPR_32RegClass;
    1244       20023 :   case 64:
    1245       20023 :     return &AMDGPU::VReg_64RegClass;
    1246           0 :   case 96:
    1247           0 :     return &AMDGPU::VReg_96RegClass;
    1248        4648 :   case 128:
    1249        4648 :     return &AMDGPU::VReg_128RegClass;
    1250          53 :   case 256:
    1251          53 :     return &AMDGPU::VReg_256RegClass;
    1252          51 :   case 512:
    1253          51 :     return &AMDGPU::VReg_512RegClass;
    1254           0 :   default:
    1255           0 :     llvm_unreachable("Invalid register class size");
    1256             :   }
    1257             : }
    1258             : 
    1259        1762 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
    1260             :                                          const TargetRegisterClass *VRC) const {
    1261        3524 :   switch (getRegSizeInBits(*VRC)) {
    1262             :   case 32:
    1263             :     return &AMDGPU::SGPR_32RegClass;
    1264         473 :   case 64:
    1265         473 :     return &AMDGPU::SReg_64RegClass;
    1266           8 :   case 128:
    1267           8 :     return &AMDGPU::SReg_128RegClass;
    1268           2 :   case 256:
    1269           2 :     return &AMDGPU::SReg_256RegClass;
    1270           0 :   case 512:
    1271           0 :     return &AMDGPU::SReg_512RegClass;
    1272           0 :   default:
    1273           0 :     llvm_unreachable("Invalid register class size");
    1274             :   }
    1275             : }
    1276             : 
    1277      333680 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
    1278             :                          const TargetRegisterClass *RC, unsigned SubIdx) const {
    1279      333680 :   if (SubIdx == AMDGPU::NoSubRegister)
    1280             :     return RC;
    1281             : 
    1282             :   // We can assume that each lane corresponds to one 32-bit register.
    1283      149100 :   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
    1284       49700 :   if (isSGPRClass(RC)) {
    1285       22822 :     switch (Count) {
    1286             :     case 1:
    1287             :       return &AMDGPU::SGPR_32RegClass;
    1288           0 :     case 2:
    1289           0 :       return &AMDGPU::SReg_64RegClass;
    1290           0 :     case 4:
    1291           0 :       return &AMDGPU::SReg_128RegClass;
    1292           0 :     case 8:
    1293           0 :       return &AMDGPU::SReg_256RegClass;
    1294           0 :     case 16: /* fall-through */
    1295             :     default:
    1296           0 :       llvm_unreachable("Invalid sub-register class size");
    1297             :     }
    1298             :   } else {
    1299       26878 :     switch (Count) {
    1300             :     case 1:
    1301             :       return &AMDGPU::VGPR_32RegClass;
    1302          46 :     case 2:
    1303          46 :       return &AMDGPU::VReg_64RegClass;
    1304           0 :     case 3:
    1305           0 :       return &AMDGPU::VReg_96RegClass;
    1306           0 :     case 4:
    1307           0 :       return &AMDGPU::VReg_128RegClass;
    1308           0 :     case 8:
    1309           0 :       return &AMDGPU::VReg_256RegClass;
    1310           0 :     case 16: /* fall-through */
    1311             :     default:
    1312           0 :       llvm_unreachable("Invalid sub-register class size");
    1313             :     }
    1314             :   }
    1315             : }
    1316             : 
    1317      354984 : bool SIRegisterInfo::shouldRewriteCopySrc(
    1318             :   const TargetRegisterClass *DefRC,
    1319             :   unsigned DefSubReg,
    1320             :   const TargetRegisterClass *SrcRC,
    1321             :   unsigned SrcSubReg) const {
    1322             :   // We want to prefer the smallest register class possible, so we don't want to
    1323             :   // stop and rewrite on anything that looks like a subregister
    1324             :   // extract. Operations mostly don't care about the super register class, so we
    1325             :   // only want to stop on the most basic of copies between the same register
    1326             :   // class.
    1327             :   //
    1328             :   // e.g. if we have something like
    1329             :   // vreg0 = ...
    1330             :   // vreg1 = ...
    1331             :   // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
    1332             :   // vreg3 = COPY vreg2, sub0
    1333             :   //
    1334             :   // We want to look through the COPY to find:
    1335             :   //  => vreg3 = COPY vreg0
    1336             : 
    1337             :   // Plain copy.
    1338      354984 :   return getCommonSubClass(DefRC, SrcRC) != nullptr;
    1339             : }
    1340             : 
    1341             : /// \brief Returns a register that is not used at any point in the function.
    1342             : ///        If all registers are used, then this function will return
    1343             : //         AMDGPU::NoRegister.
    1344             : unsigned
    1345         130 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
    1346             :                                    const TargetRegisterClass *RC,
    1347             :                                    const MachineFunction &MF) const {
    1348             : 
    1349        4368 :   for (unsigned Reg : *RC)
    1350        4104 :     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
    1351             :       return Reg;
    1352             :   return AMDGPU::NoRegister;
    1353             : }
    1354             : 
    1355        5420 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
    1356             :                                                    unsigned EltSize) const {
    1357        5420 :   if (EltSize == 4) {
    1358             :     static const int16_t Sub0_15[] = {
    1359             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1360             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1361             :       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
    1362             :       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
    1363             :     };
    1364             : 
    1365             :     static const int16_t Sub0_7[] = {
    1366             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1367             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1368             :     };
    1369             : 
    1370             :     static const int16_t Sub0_3[] = {
    1371             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1372             :     };
    1373             : 
    1374             :     static const int16_t Sub0_2[] = {
    1375             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
    1376             :     };
    1377             : 
    1378             :     static const int16_t Sub0_1[] = {
    1379             :       AMDGPU::sub0, AMDGPU::sub1,
    1380             :     };
    1381             : 
    1382        5262 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1383         918 :     case 32:
    1384         918 :       return {};
    1385        4106 :     case 64:
    1386             :       return makeArrayRef(Sub0_1);
    1387           0 :     case 96:
    1388             :       return makeArrayRef(Sub0_2);
    1389         157 :     case 128:
    1390             :       return makeArrayRef(Sub0_3);
    1391          60 :     case 256:
    1392             :       return makeArrayRef(Sub0_7);
    1393          21 :     case 512:
    1394             :       return makeArrayRef(Sub0_15);
    1395           0 :     default:
    1396           0 :       llvm_unreachable("unhandled register size");
    1397             :     }
    1398             :   }
    1399             : 
    1400         158 :   if (EltSize == 8) {
    1401             :     static const int16_t Sub0_15_64[] = {
    1402             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1403             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
    1404             :       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
    1405             :       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
    1406             :     };
    1407             : 
    1408             :     static const int16_t Sub0_7_64[] = {
    1409             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1410             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
    1411             :     };
    1412             : 
    1413             : 
    1414             :     static const int16_t Sub0_3_64[] = {
    1415             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
    1416             :     };
    1417             : 
    1418         152 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1419          16 :     case 64:
    1420          16 :       return {};
    1421         136 :     case 128:
    1422             :       return makeArrayRef(Sub0_3_64);
    1423           0 :     case 256:
    1424             :       return makeArrayRef(Sub0_7_64);
    1425           0 :     case 512:
    1426             :       return makeArrayRef(Sub0_15_64);
    1427           0 :     default:
    1428           0 :       llvm_unreachable("unhandled register size");
    1429             :     }
    1430             :   }
    1431             : 
    1432             :   assert(EltSize == 16 && "unhandled register spill split size");
    1433             : 
    1434             :   static const int16_t Sub0_15_128[] = {
    1435             :     AMDGPU::sub0_sub1_sub2_sub3,
    1436             :     AMDGPU::sub4_sub5_sub6_sub7,
    1437             :     AMDGPU::sub8_sub9_sub10_sub11,
    1438             :     AMDGPU::sub12_sub13_sub14_sub15
    1439             :   };
    1440             : 
    1441             :   static const int16_t Sub0_7_128[] = {
    1442             :     AMDGPU::sub0_sub1_sub2_sub3,
    1443             :     AMDGPU::sub4_sub5_sub6_sub7
    1444             :   };
    1445             : 
    1446           6 :   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1447           4 :   case 128:
    1448           4 :     return {};
    1449           2 :   case 256:
    1450             :     return makeArrayRef(Sub0_7_128);
    1451           0 :   case 512:
    1452             :     return makeArrayRef(Sub0_15_128);
    1453           0 :   default:
    1454           0 :     llvm_unreachable("unhandled register size");
    1455             :   }
    1456             : }
    1457             : 
    1458             : const TargetRegisterClass*
    1459     3394857 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
    1460             :                                   unsigned Reg) const {
    1461     3394857 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
    1462      110967 :     return  MRI.getRegClass(Reg);
    1463             : 
    1464     3283890 :   return getPhysRegClass(Reg);
    1465             : }
    1466             : 
    1467     3233241 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
    1468             :                             unsigned Reg) const {
    1469     3233241 :   return hasVGPRs(getRegClassForReg(MRI, Reg));
    1470             : }
    1471             : 
    1472      145651 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
    1473             :                                     const TargetRegisterClass *SrcRC,
    1474             :                                     unsigned SubReg,
    1475             :                                     const TargetRegisterClass *DstRC,
    1476             :                                     unsigned DstSubReg,
    1477             :                                     const TargetRegisterClass *NewRC) const {
    1478      291302 :   unsigned SrcSize = getRegSizeInBits(*SrcRC);
    1479      291302 :   unsigned DstSize = getRegSizeInBits(*DstRC);
    1480      291302 :   unsigned NewSize = getRegSizeInBits(*NewRC);
    1481             : 
    1482             :   // Do not increase size of registers beyond dword, we would need to allocate
    1483             :   // adjacent registers and constraint regalloc more than needed.
    1484             : 
    1485             :   // Always allow dword coalescing.
    1486      145651 :   if (SrcSize <= 32 || DstSize <= 32)
    1487             :     return true;
    1488             : 
    1489       41214 :   return NewSize <= DstSize || NewSize <= SrcSize;
    1490             : }
    1491             : 
    1492       92724 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    1493             :                                              MachineFunction &MF) const {
    1494             : 
    1495       92724 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    1496       92724 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    1497             : 
    1498      185448 :   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
    1499      185448 :                                                        *MF.getFunction());
    1500      185448 :   switch (RC->getID()) {
    1501             :   default:
    1502             :     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
    1503       46362 :   case AMDGPU::VGPR_32RegClassID:
    1504      139086 :     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
    1505       46362 :   case AMDGPU::SGPR_32RegClassID:
    1506      139086 :     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
    1507             :   }
    1508             : }
    1509             : 
    1510      858749 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
    1511             :                                                 unsigned Idx) const {
    1512      858749 :   if (Idx == getVGPRPressureSet())
    1513             :     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
    1514       46362 :                                const_cast<MachineFunction &>(MF));
    1515             : 
    1516      812387 :   if (Idx == getSGPRPressureSet())
    1517             :     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
    1518       46362 :                                const_cast<MachineFunction &>(MF));
    1519             : 
    1520      766025 :   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
    1521             : }
    1522             : 
    1523     3716517 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
    1524             :   static const int Empty[] = { -1 };
    1525             : 
    1526     3716517 :   if (hasRegUnit(AMDGPU::M0, RegUnit))
    1527             :     return Empty;
    1528     3714721 :   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
    1529      216918 : }

Generated by: LCOV version 1.13