LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIRegisterInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 531 621 85.5 %
Date: 2018-06-17 00:07:59 Functions: 41 45 91.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// SI implementation of the TargetRegisterInfo class.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "SIRegisterInfo.h"
      16             : #include "AMDGPURegisterBankInfo.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "SIInstrInfo.h"
      19             : #include "SIMachineFunctionInfo.h"
      20             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      21             : #include "llvm/CodeGen/MachineFrameInfo.h"
      22             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      23             : #include "llvm/CodeGen/RegisterScavenging.h"
      24             : #include "llvm/IR/Function.h"
      25             : #include "llvm/IR/LLVMContext.h"
      26             : 
      27             : using namespace llvm;
      28             : 
      29             : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
      30      676782 :   for (unsigned i = 0; PSets[i] != -1; ++i) {
      31      289089 :     if (PSets[i] == (int)PSetID)
      32             :       return true;
      33             :   }
      34             :   return false;
      35             : }
      36             : 
      37      121014 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
      38             :                                          BitVector &PressureSets) const {
      39      230823 :   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
      40      121014 :     const int *PSets = getRegUnitPressureSets(*U);
      41      121014 :     if (hasPressureSet(PSets, PSetID)) {
      42             :       PressureSets.set(PSetID);
      43             :       break;
      44             :     }
      45             :   }
      46      121014 : }
      47             : 
      48      101169 : static cl::opt<bool> EnableSpillSGPRToSMEM(
      49             :   "amdgpu-spill-sgpr-to-smem",
      50      101169 :   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
      51      303507 :   cl::init(false));
      52             : 
      53      101169 : static cl::opt<bool> EnableSpillSGPRToVGPR(
      54             :   "amdgpu-spill-sgpr-to-vgpr",
      55      101169 :   cl::desc("Enable spilling VGPRs to SGPRs"),
      56             :   cl::ReallyHidden,
      57      303507 :   cl::init(true));
      58             : 
      59        2241 : SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
      60             :   AMDGPURegisterInfo(),
      61             :   SGPRPressureSets(getNumRegPressureSets()),
      62             :   VGPRPressureSets(getNumRegPressureSets()),
      63             :   SpillSGPRToVGPR(false),
      64        2241 :   SpillSGPRToSMEM(false) {
      65        2241 :   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
      66           5 :     SpillSGPRToSMEM = true;
      67        2236 :   else if (EnableSpillSGPRToVGPR)
      68        2232 :     SpillSGPRToVGPR = true;
      69             : 
      70        2241 :   unsigned NumRegPressureSets = getNumRegPressureSets();
      71             : 
      72        2241 :   SGPRSetID = NumRegPressureSets;
      73        2241 :   VGPRSetID = NumRegPressureSets;
      74             : 
      75      123255 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      76       60507 :     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
      77       60507 :     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
      78             :   }
      79             : 
      80             :   // Determine the number of reg units for each pressure set.
      81        2241 :   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
      82     3982257 :   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
      83     3980016 :     const int *PSets = getRegUnitPressureSets(i);
      84    21885606 :     for (unsigned j = 0; PSets[j] != -1; ++j) {
      85    17905590 :       ++PressureSetRegUnits[PSets[j]];
      86             :     }
      87             :   }
      88             : 
      89             :   unsigned VGPRMax = 0, SGPRMax = 0;
      90      123255 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      91        6723 :     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
      92        2241 :       VGPRSetID = i;
      93        2241 :       VGPRMax = PressureSetRegUnits[i];
      94        2241 :       continue;
      95             :     }
      96       17928 :     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
      97        8964 :       SGPRSetID = i;
      98        8964 :       SGPRMax = PressureSetRegUnits[i];
      99             :     }
     100             :   }
     101             : 
     102             :   assert(SGPRSetID < NumRegPressureSets &&
     103             :          VGPRSetID < NumRegPressureSets);
     104        2241 : }
     105             : 
     106       16712 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
     107             :   const MachineFunction &MF) const {
     108             : 
     109       16712 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     110       33424 :   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
     111       16712 :   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     112       33424 :   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
     113             : }
     114             : 
     115             : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
     116             :   unsigned Reg;
     117             : 
     118             :   // Try to place it in a hole after PrivateSegmentBufferReg.
     119       16709 :   if (RegCount & 3) {
     120             :     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
     121             :     // alignment constraints, so we have a hole where can put the wave offset.
     122       16525 :     Reg = RegCount - 1;
     123             :   } else {
     124             :     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
     125             :     // wave offset before it.
     126         184 :     Reg = RegCount - 5;
     127             :   }
     128             : 
     129             :   return Reg;
     130             : }
     131             : 
     132       16709 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
     133             :   const MachineFunction &MF) const {
     134       16709 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     135       16709 :   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
     136       33418 :   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
     137             : }
     138             : 
     139        1748 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
     140             :   const MachineFunction &MF) const {
     141        1748 :   return AMDGPU::SGPR32;
     142             : }
     143             : 
     144       36799 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     145       36799 :   BitVector Reserved(getNumRegs());
     146             : 
     147             :   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
     148             :   // this seems likely to result in bugs, so I'm marking them as reserved.
     149       36799 :   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
     150       36799 :   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
     151             : 
     152             :   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
     153       36799 :   reserveRegisterTuples(Reserved, AMDGPU::M0);
     154             : 
     155             :   // Reserve the memory aperture registers.
     156       36799 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
     157       36799 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
     158       36799 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
     159       36799 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
     160             : 
     161             :   // Reserve xnack_mask registers - support is not implemented in Codegen.
     162       36799 :   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
     163             : 
     164             :   // Reserve Trap Handler registers - support is not implemented in Codegen.
     165       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TBA);
     166       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TMA);
     167       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
     168       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
     169       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
     170       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
     171       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
     172       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
     173       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
     174       36799 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
     175             : 
     176       36799 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     177             : 
     178       36799 :   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
     179       36799 :   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     180      339091 :   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     181      151146 :     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     182      151146 :     reserveRegisterTuples(Reserved, Reg);
     183             :   }
     184             : 
     185       36799 :   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
     186       36799 :   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
     187       50607 :   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     188        6904 :     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     189        6904 :     reserveRegisterTuples(Reserved, Reg);
     190             :   }
     191             : 
     192             :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     193             : 
     194       36799 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     195       36799 :   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
     196             :     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
     197       36799 :     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
     198             :   }
     199             : 
     200       36799 :   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
     201       36799 :   if (ScratchRSrcReg != AMDGPU::NoRegister) {
     202             :     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
     203             :     // to spill.
     204             :     // TODO: May need to reserve a VGPR if doing LDS spilling.
     205       36799 :     reserveRegisterTuples(Reserved, ScratchRSrcReg);
     206             :     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
     207             :   }
     208             : 
     209             :   // We have to assume the SP is needed in case there are calls in the function,
     210             :   // which is detected after the function is lowered. If we aren't really going
     211             :   // to need SP, don't bother reserving it.
     212       36799 :   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
     213             : 
     214       36799 :   if (StackPtrReg != AMDGPU::NoRegister) {
     215       36799 :     reserveRegisterTuples(Reserved, StackPtrReg);
     216             :     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
     217             :   }
     218             : 
     219       36799 :   unsigned FrameReg = MFI->getFrameOffsetReg();
     220       36799 :   if (FrameReg != AMDGPU::NoRegister) {
     221       36799 :     reserveRegisterTuples(Reserved, FrameReg);
     222             :     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
     223             :   }
     224             : 
     225       36799 :   return Reserved;
     226             : }
     227             : 
     228       35714 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
     229             :   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
     230       35714 :   if (Info->isEntryFunction()) {
     231       32898 :     const MachineFrameInfo &MFI = Fn.getFrameInfo();
     232       32898 :     return MFI.hasStackObjects() || MFI.hasCalls();
     233             :   }
     234             : 
     235             :   // May need scavenger for dealing with callee saved registers.
     236             :   return true;
     237             : }
     238             : 
     239       17857 : bool SIRegisterInfo::requiresFrameIndexScavenging(
     240             :   const MachineFunction &MF) const {
     241       17857 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     242       17857 :   if (MFI.hasStackObjects())
     243             :     return true;
     244             : 
     245             :   // May need to deal with callee saved registers.
     246             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     247       17241 :   return !Info->isEntryFunction();
     248             : }
     249             : 
     250       17532 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
     251             :   const MachineFunction &MF) const {
     252             :   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
     253             :   // create a virtual register for it during frame index elimination, so the
     254             :   // scavenger is directly needed.
     255       18148 :   return MF.getFrameInfo().hasStackObjects() &&
     256       17862 :          MF.getSubtarget<SISubtarget>().hasScalarStores() &&
     257       17862 :          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
     258             : }
     259             : 
     260       17844 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
     261             :   const MachineFunction &) const {
     262             :   // There are no special dedicated stack or frame pointers.
     263       17844 :   return true;
     264             : }
     265             : 
     266       35745 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     267             :   // This helps catch bugs as verifier errors.
     268       35745 :   return true;
     269             : }
     270             : 
     271        4451 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
     272             :   assert(SIInstrInfo::isMUBUF(*MI));
     273             : 
     274        8902 :   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     275             :                                           AMDGPU::OpName::offset);
     276        8902 :   return MI->getOperand(OffIdx).getImm();
     277             : }
     278             : 
     279           4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
     280             :                                                  int Idx) const {
     281           4 :   if (!SIInstrInfo::isMUBUF(*MI))
     282             :     return 0;
     283             : 
     284             :   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     285             :                                            AMDGPU::OpName::vaddr) &&
     286             :          "Should never see frame index on non-address operand");
     287             : 
     288           4 :   return getMUBUFInstrOffset(MI);
     289             : }
     290             : 
     291        4791 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     292        4791 :   if (!MI->mayLoadOrStore())
     293             :     return false;
     294             : 
     295        4447 :   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
     296             : 
     297        4447 :   return !isUInt<12>(FullOffset);
     298             : }
     299             : 
     300           0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
     301             :                                                   unsigned BaseReg,
     302             :                                                   int FrameIdx,
     303             :                                                   int64_t Offset) const {
     304             :   MachineBasicBlock::iterator Ins = MBB->begin();
     305           0 :   DebugLoc DL; // Defaults to "unknown"
     306             : 
     307           0 :   if (Ins != MBB->end())
     308             :     DL = Ins->getDebugLoc();
     309             : 
     310           0 :   MachineFunction *MF = MBB->getParent();
     311           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     312             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     313             : 
     314           0 :   if (Offset == 0) {
     315           0 :     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
     316             :       .addFrameIndex(FrameIdx);
     317             :     return;
     318             :   }
     319             : 
     320           0 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     321           0 :   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     322             : 
     323           0 :   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     324             : 
     325           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     326             :     .addImm(Offset);
     327           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
     328             :     .addFrameIndex(FrameIdx);
     329             : 
     330           0 :   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     331           0 :     .addReg(OffsetReg, RegState::Kill)
     332           0 :     .addReg(FIReg);
     333             : }
     334             : 
     335           0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     336             :                                        int64_t Offset) const {
     337             : 
     338           0 :   MachineBasicBlock *MBB = MI.getParent();
     339           0 :   MachineFunction *MF = MBB->getParent();
     340           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     341             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     342             : 
     343             : #ifndef NDEBUG
     344             :   // FIXME: Is it possible to be storing a frame index to itself?
     345             :   bool SeenFI = false;
     346             :   for (const MachineOperand &MO: MI.operands()) {
     347             :     if (MO.isFI()) {
     348             :       if (SeenFI)
     349             :         llvm_unreachable("should not see multiple frame indices");
     350             : 
     351             :       SeenFI = true;
     352             :     }
     353             :   }
     354             : #endif
     355             : 
     356           0 :   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
     357             :   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
     358             :   assert(TII->isMUBUF(MI));
     359             :   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
     360             :          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
     361             :          "should only be seeing frame offset relative FrameIndex");
     362             : 
     363             : 
     364           0 :   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
     365           0 :   int64_t NewOffset = OffsetOp->getImm() + Offset;
     366             :   assert(isUInt<12>(NewOffset) && "offset should be legal");
     367             : 
     368           0 :   FIOp->ChangeToRegister(BaseReg, false);
     369             :   OffsetOp->setImm(NewOffset);
     370           0 : }
     371             : 
     372           0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     373             :                                         unsigned BaseReg,
     374             :                                         int64_t Offset) const {
     375           0 :   if (!SIInstrInfo::isMUBUF(*MI))
     376             :     return false;
     377             : 
     378           0 :   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
     379             : 
     380           0 :   return isUInt<12>(NewOffset);
     381             : }
     382             : 
     383           0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
     384             :   const MachineFunction &MF, unsigned Kind) const {
     385             :   // This is inaccurate. It depends on the instruction and address space. The
     386             :   // only place where we should hit this is for dealing with frame indexes /
     387             :   // private accesses, so this is correct in that case.
     388           0 :   return &AMDGPU::VGPR_32RegClass;
     389             : }
     390             : 
     391        1265 : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
     392             : 
     393        1265 :   switch (Op) {
     394             :   case AMDGPU::SI_SPILL_S512_SAVE:
     395             :   case AMDGPU::SI_SPILL_S512_RESTORE:
     396             :   case AMDGPU::SI_SPILL_V512_SAVE:
     397             :   case AMDGPU::SI_SPILL_V512_RESTORE:
     398             :     return 16;
     399           0 :   case AMDGPU::SI_SPILL_S256_SAVE:
     400             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     401             :   case AMDGPU::SI_SPILL_V256_SAVE:
     402             :   case AMDGPU::SI_SPILL_V256_RESTORE:
     403           0 :     return 8;
     404         663 :   case AMDGPU::SI_SPILL_S128_SAVE:
     405             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     406             :   case AMDGPU::SI_SPILL_V128_SAVE:
     407             :   case AMDGPU::SI_SPILL_V128_RESTORE:
     408         663 :     return 4;
     409           0 :   case AMDGPU::SI_SPILL_V96_SAVE:
     410             :   case AMDGPU::SI_SPILL_V96_RESTORE:
     411           0 :     return 3;
     412           0 :   case AMDGPU::SI_SPILL_S64_SAVE:
     413             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     414             :   case AMDGPU::SI_SPILL_V64_SAVE:
     415             :   case AMDGPU::SI_SPILL_V64_RESTORE:
     416           0 :     return 2;
     417         602 :   case AMDGPU::SI_SPILL_S32_SAVE:
     418             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     419             :   case AMDGPU::SI_SPILL_V32_SAVE:
     420             :   case AMDGPU::SI_SPILL_V32_RESTORE:
     421         602 :     return 1;
     422           0 :   default: llvm_unreachable("Invalid spill opcode");
     423             :   }
     424             : }
     425             : 
     426             : static int getOffsetMUBUFStore(unsigned Opc) {
     427        3077 :   switch (Opc) {
     428             :   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
     429             :     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
     430             :   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
     431             :     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
     432             :   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
     433             :     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
     434             :   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
     435             :     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
     436             :   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
     437             :     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
     438             :   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
     439             :     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
     440             :   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
     441             :     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
     442             :   default:
     443             :     return -1;
     444             :   }
     445             : }
     446             : 
     447        1782 : static int getOffsetMUBUFLoad(unsigned Opc) {
     448        1782 :   switch (Opc) {
     449             :   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
     450             :     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
     451          82 :   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
     452          82 :     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
     453           8 :   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
     454           8 :     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
     455          24 :   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
     456          24 :     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
     457           2 :   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
     458           2 :     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
     459           2 :   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
     460           2 :     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
     461          16 :   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
     462          16 :     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
     463           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
     464           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
     465           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
     466           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
     467           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
     468           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
     469           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
     470           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
     471           3 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
     472           3 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
     473           4 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
     474           4 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
     475           0 :   default:
     476           0 :     return -1;
     477             :   }
     478             : }
     479             : 
     480             : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
     481             : // need to handle the case where an SGPR may need to be spilled while spilling.
     482        4859 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     483             :                                       MachineFrameInfo &MFI,
     484             :                                       MachineBasicBlock::iterator MI,
     485             :                                       int Index,
     486             :                                       int64_t Offset) {
     487        4859 :   MachineBasicBlock *MBB = MI->getParent();
     488             :   const DebugLoc &DL = MI->getDebugLoc();
     489        4859 :   bool IsStore = MI->mayStore();
     490             : 
     491        4859 :   unsigned Opc = MI->getOpcode();
     492        6641 :   int LoadStoreOp = IsStore ?
     493             :     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
     494        1782 :   if (LoadStoreOp == -1)
     495             :     return false;
     496             : 
     497        4859 :   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
     498        9718 :   MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
     499             :     .add(*Reg)
     500        4859 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
     501        4859 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
     502             :     .addImm(Offset)
     503             :     .addImm(0) // glc
     504             :     .addImm(0) // slc
     505             :     .addImm(0) // tfe
     506        4859 :     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     507             : 
     508             :   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
     509        4859 :                                                        AMDGPU::OpName::vdata_in);
     510        4859 :   if (VDataIn)
     511             :     NewMI.add(*VDataIn);
     512             :   return true;
     513             : }
     514             : 
     515        2441 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     516             :                                          unsigned LoadStoreOp,
     517             :                                          int Index,
     518             :                                          unsigned ValueReg,
     519             :                                          bool IsKill,
     520             :                                          unsigned ScratchRsrcReg,
     521             :                                          unsigned ScratchOffsetReg,
     522             :                                          int64_t InstOffset,
     523             :                                          MachineMemOperand *MMO,
     524             :                                          RegScavenger *RS) const {
     525        2441 :   MachineBasicBlock *MBB = MI->getParent();
     526        2441 :   MachineFunction *MF = MI->getParent()->getParent();
     527        2441 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     528             :   const SIInstrInfo *TII = ST.getInstrInfo();
     529        2441 :   const MachineFrameInfo &MFI = MF->getFrameInfo();
     530             : 
     531        2441 :   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
     532             :   const DebugLoc &DL = MI->getDebugLoc();
     533        2441 :   bool IsStore = Desc.mayStore();
     534             : 
     535             :   bool RanOutOfSGPRs = false;
     536             :   bool Scavenged = false;
     537             :   unsigned SOffset = ScratchOffsetReg;
     538             : 
     539        2441 :   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
     540        4882 :   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
     541        2441 :   unsigned Size = NumSubRegs * 4;
     542        2441 :   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
     543             :   const int64_t OriginalImmOffset = Offset;
     544             : 
     545             :   unsigned Align = MFI.getObjectAlignment(Index);
     546             :   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
     547             : 
     548        2441 :   if (!isUInt<12>(Offset + Size)) {
     549             :     SOffset = AMDGPU::NoRegister;
     550             : 
     551             :     // We don't have access to the register scavenger if this function is called
     552             :     // during  PEI::scavengeFrameVirtualRegs().
     553         232 :     if (RS)
     554           0 :       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
     555             : 
     556           0 :     if (SOffset == AMDGPU::NoRegister) {
     557             :       // There are no free SGPRs, and since we are in the process of spilling
     558             :       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
     559             :       // on SI/CI and on VI it is true until we implement spilling using scalar
     560             :       // stores), we have no way to free up an SGPR.  Our solution here is to
     561             :       // add the offset directly to the ScratchOffset register, and then
     562             :       // subtract the offset after the spill to return ScratchOffset to it's
     563             :       // original value.
     564             :       RanOutOfSGPRs = true;
     565             :       SOffset = ScratchOffsetReg;
     566             :     } else {
     567             :       Scavenged = true;
     568             :     }
     569             : 
     570         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
     571         232 :       .addReg(ScratchOffsetReg)
     572             :       .addImm(Offset);
     573             : 
     574             :     Offset = 0;
     575             :   }
     576             : 
     577             :   const unsigned EltSize = 4;
     578             : 
     579       15297 :   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     580       11744 :     unsigned SubReg = NumSubRegs == 1 ?
     581        5316 :       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
     582             : 
     583             :     unsigned SOffsetRegState = 0;
     584             :     unsigned SrcDstRegState = getDefRegState(!IsStore);
     585        6428 :     if (i + 1 == e) {
     586             :       SOffsetRegState |= getKillRegState(Scavenged);
     587             :       // The last implicit use carries the "Kill" flag.
     588        2441 :       SrcDstRegState |= getKillRegState(IsKill);
     589             :     }
     590             : 
     591        6428 :     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
     592             :     MachineMemOperand *NewMMO
     593       12856 :       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
     594        6428 :                                  EltSize, MinAlign(Align, EltSize * i));
     595             : 
     596       12856 :     auto MIB = BuildMI(*MBB, MI, DL, Desc)
     597        6428 :       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
     598        6428 :       .addReg(ScratchRsrcReg)
     599        6428 :       .addReg(SOffset, SOffsetRegState)
     600             :       .addImm(Offset)
     601             :       .addImm(0) // glc
     602             :       .addImm(0) // slc
     603             :       .addImm(0) // tfe
     604        6428 :       .addMemOperand(NewMMO);
     605             : 
     606        6428 :     if (NumSubRegs > 1)
     607        5316 :       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
     608             :   }
     609             : 
     610        2441 :   if (RanOutOfSGPRs) {
     611             :     // Subtract the offset we added to the ScratchOffset register.
     612         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
     613         232 :       .addReg(ScratchOffsetReg)
     614             :       .addImm(OriginalImmOffset);
     615             :   }
     616        2441 : }
     617             : 
     618             : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
     619             :                                                      bool Store) {
     620          28 :   if (SuperRegSize % 16 == 0) {
     621             :     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
     622             :                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
     623             :   }
     624             : 
     625          22 :   if (SuperRegSize % 8 == 0) {
     626             :     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
     627             :                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
     628             :   }
     629             : 
     630             :   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
     631             :                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
     632             : }
     633             : 
     634         723 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
     635             :                                int Index,
     636             :                                RegScavenger *RS,
     637             :                                bool OnlyToVGPR) const {
     638         723 :   MachineBasicBlock *MBB = MI->getParent();
     639         723 :   MachineFunction *MF = MBB->getParent();
     640         723 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     641             :   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
     642             : 
     643             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     644         723 :     = MFI->getSGPRToVGPRSpills(Index);
     645         723 :   bool SpillToVGPR = !VGPRSpills.empty();
     646         723 :   if (OnlyToVGPR && !SpillToVGPR)
     647             :     return false;
     648             : 
     649         723 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     650         723 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     651             :   const SIInstrInfo *TII = ST.getInstrInfo();
     652             : 
     653         723 :   unsigned SuperReg = MI->getOperand(0).getReg();
     654             :   bool IsKill = MI->getOperand(0).isKill();
     655             :   const DebugLoc &DL = MI->getDebugLoc();
     656             : 
     657         723 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     658             : 
     659         723 :   bool SpillToSMEM = spillSGPRToSMEM();
     660         723 :   if (SpillToSMEM && OnlyToVGPR)
     661             :     return false;
     662             : 
     663             :   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
     664             :                          SuperReg != MFI->getFrameOffsetReg() &&
     665             :                          SuperReg != MFI->getScratchWaveOffsetReg()));
     666             : 
     667             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     668             : 
     669             :   unsigned OffsetReg = AMDGPU::M0;
     670             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     671             : 
     672         723 :   if (SpillToSMEM) {
     673          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     674          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     675          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     676          14 :         .addReg(AMDGPU::M0);
     677             :     }
     678             :   }
     679             : 
     680             :   unsigned ScalarStoreOp;
     681         723 :   unsigned EltSize = 4;
     682         723 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     683         737 :   if (SpillToSMEM && isSGPRClass(RC)) {
     684             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     685             :     // able to spill wider vmem spills.
     686             :     std::tie(EltSize, ScalarStoreOp) =
     687          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
     688             :   }
     689             : 
     690         723 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     691         723 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     692             : 
     693             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     694         723 :   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
     695        3313 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     696        2069 :     unsigned SubReg = NumSubRegs == 1 ?
     697        1548 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     698             : 
     699        1295 :     if (SpillToSMEM) {
     700             :       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     701             : 
     702             :       // The allocated memory size is really the wavefront size * the frame
     703             :       // index size. The widest register class is 64 bytes, so a 4-byte scratch
     704             :       // allocation is enough to spill this in a single stack object.
     705             :       //
     706             :       // FIXME: Frame size/offsets are computed earlier than this, so the extra
     707             :       // space is still unnecessarily allocated.
     708             : 
     709             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     710             :       MachinePointerInfo PtrInfo
     711          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     712             :       MachineMemOperand *MMO
     713          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     714          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     715             : 
     716             :       // SMEM instructions only support a single offset, so increment the wave
     717             :       // offset.
     718             : 
     719          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     720          15 :       if (Offset != 0) {
     721          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     722          15 :           .addReg(MFI->getFrameOffsetReg())
     723             :           .addImm(Offset);
     724             :       } else {
     725           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     726           0 :           .addReg(MFI->getFrameOffsetReg());
     727             :       }
     728             : 
     729          45 :       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
     730          15 :         .addReg(SubReg, getKillRegState(IsKill)) // sdata
     731          15 :         .addReg(MFI->getScratchRSrcReg())        // sbase
     732          15 :         .addReg(OffsetReg, RegState::Kill)       // soff
     733             :         .addImm(0)                               // glc
     734             :         .addMemOperand(MMO);
     735             : 
     736          15 :       continue;
     737             :     }
     738             : 
     739        1280 :     if (SpillToVGPR) {
     740        2396 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     741             : 
     742             :       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
     743             :       // only circumstance in which we say it is undefined is when it is the
     744             :       // first spill to this VGPR in the first basic block.
     745             :       bool VGPRDefined = true;
     746        1198 :       if (MBB == &MF->front())
     747        1157 :         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
     748             : 
     749             :       // Mark the "old value of vgpr" input undef only if this is the first sgpr
     750             :       // spill to this specific vgpr in the first basic block.
     751        2396 :       BuildMI(*MBB, MI, DL,
     752             :               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
     753        1198 :               Spill.VGPR)
     754        1198 :         .addReg(SubReg, getKillRegState(IsKill))
     755        1198 :         .addImm(Spill.Lane)
     756        1198 :         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
     757             : 
     758             :       // FIXME: Since this spills to another register instead of an actual
     759             :       // frame index, we should delete the frame index when all references to
     760             :       // it are fixed.
     761             :     } else {
     762             :       // XXX - Can to VGPR spill fail for some subregisters but not others?
     763          82 :       if (OnlyToVGPR)
     764           0 :         return false;
     765             : 
     766             :       // Spill SGPR to a frame index.
     767             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     768          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     769             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     770             : 
     771             :       MachineInstrBuilder Mov
     772         246 :         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
     773          82 :         .addReg(SubReg, SubKillState);
     774             : 
     775             : 
     776             :       // There could be undef components of a spilled super register.
     777             :       // TODO: Can we detect this and skip the spill?
     778          82 :       if (NumSubRegs > 1) {
     779             :         // The last implicit use of the SuperReg carries the "Kill" flag.
     780             :         unsigned SuperKillState = 0;
     781          76 :         if (i + 1 == e)
     782             :           SuperKillState |= getKillRegState(IsKill);
     783          76 :         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
     784             :       }
     785             : 
     786             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     787             :       MachinePointerInfo PtrInfo
     788          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     789             :       MachineMemOperand *MMO
     790         164 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     791          82 :                                    EltSize, MinAlign(Align, EltSize * i));
     792         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
     793          82 :         .addReg(TmpReg, RegState::Kill)    // src
     794             :         .addFrameIndex(Index)              // vaddr
     795          82 :         .addReg(MFI->getScratchRSrcReg())  // srrsrc
     796          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     797          82 :         .addImm(i * 4)                     // offset
     798             :         .addMemOperand(MMO);
     799             :     }
     800             :   }
     801             : 
     802         723 :   if (M0CopyReg != AMDGPU::NoRegister) {
     803          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     804          14 :       .addReg(M0CopyReg, RegState::Kill);
     805             :   }
     806             : 
     807         723 :   MI->eraseFromParent();
     808             :   MFI->addToSpilledSGPRs(NumSubRegs);
     809         723 :   return true;
     810             : }
     811             : 
     812         710 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
     813             :                                  int Index,
     814             :                                  RegScavenger *RS,
     815             :                                  bool OnlyToVGPR) const {
     816         710 :   MachineFunction *MF = MI->getParent()->getParent();
     817         710 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     818             :   MachineBasicBlock *MBB = MI->getParent();
     819         710 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     820             : 
     821             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     822         710 :     = MFI->getSGPRToVGPRSpills(Index);
     823         710 :   bool SpillToVGPR = !VGPRSpills.empty();
     824         710 :   if (OnlyToVGPR && !SpillToVGPR)
     825             :     return false;
     826             : 
     827         710 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     828         710 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     829             :   const SIInstrInfo *TII = ST.getInstrInfo();
     830             :   const DebugLoc &DL = MI->getDebugLoc();
     831             : 
     832         710 :   unsigned SuperReg = MI->getOperand(0).getReg();
     833         710 :   bool SpillToSMEM = spillSGPRToSMEM();
     834         710 :   if (SpillToSMEM && OnlyToVGPR)
     835             :     return false;
     836             : 
     837             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     838             : 
     839             :   unsigned OffsetReg = AMDGPU::M0;
     840             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     841             : 
     842         710 :   if (SpillToSMEM) {
     843          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     844          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     845          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     846          14 :         .addReg(AMDGPU::M0);
     847             :     }
     848             :   }
     849             : 
     850         710 :   unsigned EltSize = 4;
     851             :   unsigned ScalarLoadOp;
     852             : 
     853         710 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     854         724 :   if (SpillToSMEM && isSGPRClass(RC)) {
     855             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     856             :     // able to spill wider vmem spills.
     857             :     std::tie(EltSize, ScalarLoadOp) =
     858          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
     859             :   }
     860             : 
     861         710 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     862         710 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     863             : 
     864             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     865             :   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     866             : 
     867        3262 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     868        2038 :     unsigned SubReg = NumSubRegs == 1 ?
     869        1524 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     870             : 
     871        1276 :     if (SpillToSMEM) {
     872             :       // FIXME: Size may be > 4 but extra bytes wasted.
     873             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     874             :       MachinePointerInfo PtrInfo
     875          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     876             :       MachineMemOperand *MMO
     877          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
     878          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     879             : 
     880             :       // Add i * 4 offset
     881          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     882          15 :       if (Offset != 0) {
     883          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     884          15 :           .addReg(MFI->getFrameOffsetReg())
     885             :           .addImm(Offset);
     886             :       } else {
     887           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     888           0 :           .addReg(MFI->getFrameOffsetReg());
     889             :       }
     890             : 
     891             :       auto MIB =
     892          45 :         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
     893          15 :         .addReg(MFI->getScratchRSrcReg()) // sbase
     894          15 :         .addReg(OffsetReg, RegState::Kill)                // soff
     895             :         .addImm(0)                        // glc
     896          15 :         .addMemOperand(MMO);
     897             : 
     898          15 :       if (NumSubRegs > 1)
     899           2 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     900             : 
     901          15 :       continue;
     902             :     }
     903             : 
     904        1261 :     if (SpillToVGPR) {
     905        2358 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     906             :       auto MIB =
     907        2358 :         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
     908             :                 SubReg)
     909        1179 :         .addReg(Spill.VGPR)
     910        2358 :         .addImm(Spill.Lane);
     911             : 
     912        1179 :       if (NumSubRegs > 1)
     913         684 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     914             :     } else {
     915          82 :       if (OnlyToVGPR)
     916           0 :         return false;
     917             : 
     918             :       // Restore SGPR from a stack slot.
     919             :       // FIXME: We should use S_LOAD_DWORD here for VI.
     920          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     921             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     922             : 
     923             :       MachinePointerInfo PtrInfo
     924          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     925             : 
     926         164 :       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
     927             :         MachineMemOperand::MOLoad, EltSize,
     928          82 :         MinAlign(Align, EltSize * i));
     929             : 
     930         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
     931             :         .addFrameIndex(Index)              // vaddr
     932          82 :         .addReg(MFI->getScratchRSrcReg())  // srsrc
     933          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     934          82 :         .addImm(i * 4)                     // offset
     935             :         .addMemOperand(MMO);
     936             : 
     937             :       auto MIB =
     938         246 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
     939          82 :         .addReg(TmpReg, RegState::Kill);
     940             : 
     941          82 :       if (NumSubRegs > 1)
     942          76 :         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
     943             :     }
     944             :   }
     945             : 
     946         710 :   if (M0CopyReg != AMDGPU::NoRegister) {
     947          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     948          14 :       .addReg(M0CopyReg, RegState::Kill);
     949             :   }
     950             : 
     951         710 :   MI->eraseFromParent();
     952         710 :   return true;
     953             : }
     954             : 
     955             : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
     956             : /// a VGPR and the stack slot can be safely eliminated when all other users are
     957             : /// handled.
     958        1341 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
     959             :   MachineBasicBlock::iterator MI,
     960             :   int FI,
     961             :   RegScavenger *RS) const {
     962        1341 :   switch (MI->getOpcode()) {
     963         677 :   case AMDGPU::SI_SPILL_S512_SAVE:
     964             :   case AMDGPU::SI_SPILL_S256_SAVE:
     965             :   case AMDGPU::SI_SPILL_S128_SAVE:
     966             :   case AMDGPU::SI_SPILL_S64_SAVE:
     967             :   case AMDGPU::SI_SPILL_S32_SAVE:
     968         677 :     return spillSGPR(MI, FI, RS, true);
     969         664 :   case AMDGPU::SI_SPILL_S512_RESTORE:
     970             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     971             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     972             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     973             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     974         664 :     return restoreSGPR(MI, FI, RS, true);
     975           0 :   default:
     976           0 :     llvm_unreachable("not an SGPR spill instruction");
     977             :   }
     978             : }
     979             : 
     980        7752 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     981             :                                         int SPAdj, unsigned FIOperandNum,
     982             :                                         RegScavenger *RS) const {
     983        7752 :   MachineFunction *MF = MI->getParent()->getParent();
     984        7752 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     985             :   MachineBasicBlock *MBB = MI->getParent();
     986        7752 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     987        7752 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     988        7752 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     989             :   const SIInstrInfo *TII = ST.getInstrInfo();
     990             :   DebugLoc DL = MI->getDebugLoc();
     991             : 
     992        7752 :   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
     993        7752 :   int Index = MI->getOperand(FIOperandNum).getIndex();
     994             : 
     995       15504 :   switch (MI->getOpcode()) {
     996             :     // SGPR register spill
     997          46 :     case AMDGPU::SI_SPILL_S512_SAVE:
     998             :     case AMDGPU::SI_SPILL_S256_SAVE:
     999             :     case AMDGPU::SI_SPILL_S128_SAVE:
    1000             :     case AMDGPU::SI_SPILL_S64_SAVE:
    1001             :     case AMDGPU::SI_SPILL_S32_SAVE: {
    1002          46 :       spillSGPR(MI, Index, RS);
    1003          46 :       break;
    1004             :     }
    1005             : 
    1006             :     // SGPR register restore
    1007          46 :     case AMDGPU::SI_SPILL_S512_RESTORE:
    1008             :     case AMDGPU::SI_SPILL_S256_RESTORE:
    1009             :     case AMDGPU::SI_SPILL_S128_RESTORE:
    1010             :     case AMDGPU::SI_SPILL_S64_RESTORE:
    1011             :     case AMDGPU::SI_SPILL_S32_RESTORE: {
    1012          46 :       restoreSGPR(MI, Index, RS);
    1013          46 :       break;
    1014             :     }
    1015             : 
    1016             :     // VGPR register spill
    1017             :     case AMDGPU::SI_SPILL_V512_SAVE:
    1018             :     case AMDGPU::SI_SPILL_V256_SAVE:
    1019             :     case AMDGPU::SI_SPILL_V128_SAVE:
    1020             :     case AMDGPU::SI_SPILL_V96_SAVE:
    1021             :     case AMDGPU::SI_SPILL_V64_SAVE:
    1022             :     case AMDGPU::SI_SPILL_V32_SAVE: {
    1023             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1024        1265 :                                                          AMDGPU::OpName::vdata);
    1025        2530 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
    1026             :             Index,
    1027             :             VData->getReg(), VData->isKill(),
    1028             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1029             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1030             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1031        1265 :             *MI->memoperands_begin(),
    1032             :             RS);
    1033        2530 :       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
    1034        1265 :       MI->eraseFromParent();
    1035        1265 :       break;
    1036             :     }
    1037             :     case AMDGPU::SI_SPILL_V32_RESTORE:
    1038             :     case AMDGPU::SI_SPILL_V64_RESTORE:
    1039             :     case AMDGPU::SI_SPILL_V96_RESTORE:
    1040             :     case AMDGPU::SI_SPILL_V128_RESTORE:
    1041             :     case AMDGPU::SI_SPILL_V256_RESTORE:
    1042             :     case AMDGPU::SI_SPILL_V512_RESTORE: {
    1043             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1044        1176 :                                                          AMDGPU::OpName::vdata);
    1045             : 
    1046        2352 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
    1047             :             Index,
    1048             :             VData->getReg(), VData->isKill(),
    1049             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1050             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1051             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1052        1176 :             *MI->memoperands_begin(),
    1053             :             RS);
    1054        1176 :       MI->eraseFromParent();
    1055        1176 :       break;
    1056             :     }
    1057             : 
    1058             :     default: {
    1059             :       const DebugLoc &DL = MI->getDebugLoc();
    1060             :       bool IsMUBUF = TII->isMUBUF(*MI);
    1061             : 
    1062        5563 :       if (!IsMUBUF &&
    1063         344 :           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
    1064             :         // Convert to an absolute stack address by finding the offset from the
    1065             :         // scratch wave base and scaling by the wave size.
    1066             :         //
    1067             :         // In an entry function/kernel the stack address is already the
    1068             :         // absolute address relative to the scratch wave offset.
    1069             : 
    1070             :         unsigned DiffReg
    1071          36 :           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1072             : 
    1073          36 :         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
    1074          72 :         unsigned ResultReg = IsCopy ?
    1075          31 :           MI->getOperand(0).getReg() :
    1076          36 :           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1077             : 
    1078         108 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
    1079          36 :           .addReg(MFI->getFrameOffsetReg())
    1080          36 :           .addReg(MFI->getScratchWaveOffsetReg());
    1081             : 
    1082             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1083          36 :         if (Offset == 0) {
    1084             :           // XXX - This never happens because of emergency scavenging slot at 0?
    1085           0 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
    1086           0 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1087           0 :             .addReg(DiffReg);
    1088             :         } else {
    1089             :           unsigned ScaledReg
    1090          36 :             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1091             : 
    1092         108 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
    1093          72 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1094          36 :             .addReg(DiffReg, RegState::Kill);
    1095             : 
    1096             :           // TODO: Fold if use instruction is another add of a constant.
    1097          36 :           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
    1098          64 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1099             :               .addImm(Offset)
    1100          32 :               .addReg(ScaledReg, RegState::Kill);
    1101             :           } else {
    1102             :             unsigned ConstOffsetReg
    1103           4 :               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1104             : 
    1105          12 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
    1106             :               .addImm(Offset);
    1107           8 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1108           4 :               .addReg(ConstOffsetReg, RegState::Kill)
    1109           4 :               .addReg(ScaledReg, RegState::Kill);
    1110             :           }
    1111             :         }
    1112             : 
    1113             :         // Don't introduce an extra copy if we're just materializing in a mov.
    1114          36 :         if (IsCopy)
    1115          31 :           MI->eraseFromParent();
    1116             :         else
    1117           5 :           FIOp.ChangeToRegister(ResultReg, false, false, true);
    1118             :         return;
    1119             :       }
    1120             : 
    1121        5183 :       if (IsMUBUF) {
    1122             :         // Disable offen so we don't need a 0 vgpr base.
    1123             :         assert(static_cast<int>(FIOperandNum) ==
    1124             :                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    1125             :                                           AMDGPU::OpName::vaddr));
    1126             : 
    1127             :         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
    1128             :                == MFI->getFrameOffsetReg());
    1129             : 
    1130             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1131             :         int64_t OldImm
    1132        4875 :           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
    1133        4875 :         int64_t NewOffset = OldImm + Offset;
    1134             : 
    1135        9734 :         if (isUInt<12>(NewOffset) &&
    1136        4859 :             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
    1137        4859 :           MI->eraseFromParent();
    1138        4859 :           return;
    1139             :         }
    1140             :       }
    1141             : 
    1142             :       // If the offset is simply too big, don't convert to a scratch wave offset
    1143             :       // relative index.
    1144             : 
    1145             :       int64_t Offset = FrameInfo.getObjectOffset(Index);
    1146         324 :       FIOp.ChangeToImmediate(Offset);
    1147         324 :       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
    1148          16 :         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1149          48 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    1150             :           .addImm(Offset);
    1151          16 :         FIOp.ChangeToRegister(TmpReg, false, false, true);
    1152             :       }
    1153             :     }
    1154             :   }
    1155             : }
    1156             : 
    1157     7608621 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
    1158             :   #define AMDGPU_REG_ASM_NAMES
    1159             :   #include "AMDGPURegAsmNames.inc.cpp"
    1160             : 
    1161             :   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
    1162             :     if (Reg >= BeginReg && Reg <= EndReg) {                \
    1163             :       unsigned Index = Reg - BeginReg;                     \
    1164             :       assert(Index < array_lengthof(RegTable));            \
    1165             :       return RegTable[Index];                              \
    1166             :     }
    1167             : 
    1168     8559623 :   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
    1169     7504587 :   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
    1170     6374321 :   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
    1171     5475897 :   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
    1172     5018065 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
    1173             :             VGPR96RegNames);
    1174             : 
    1175     5293102 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
    1176             :             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
    1177             :             VGPR128RegNames);
    1178     4799454 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
    1179             :             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
    1180             :             SGPR128RegNames);
    1181             : 
    1182     4956271 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
    1183             :             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1184             :             VGPR256RegNames);
    1185             : 
    1186     4676974 :   REG_RANGE(
    1187             :     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
    1188             :     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1189             :     VGPR512RegNames);
    1190             : 
    1191     4210948 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
    1192             :             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1193             :             SGPR256RegNames);
    1194             : 
    1195     4152388 :   REG_RANGE(
    1196             :     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
    1197             :     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1198             :     SGPR512RegNames
    1199             :   );
    1200             : 
    1201             : #undef REG_RANGE
    1202             : 
    1203             :   // FIXME: Rename flat_scr so we don't need to special case this.
    1204     4053284 :   switch (Reg) {
    1205             :   case AMDGPU::FLAT_SCR:
    1206        3295 :     return "flat_scratch";
    1207             :   case AMDGPU::FLAT_SCR_LO:
    1208        6885 :     return "flat_scratch_lo";
    1209             :   case AMDGPU::FLAT_SCR_HI:
    1210        6885 :     return "flat_scratch_hi";
    1211     4036219 :   default:
    1212             :     // For the special named registers the default is fine.
    1213     4036219 :     return TargetRegisterInfo::getRegAsmName(Reg);
    1214             :   }
    1215             : }
    1216             : 
    1217             : // FIXME: This is very slow. It might be worth creating a map from physreg to
    1218             : // register class.
    1219     5187015 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
    1220             :   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
    1221             : 
    1222             :   static const TargetRegisterClass *const BaseClasses[] = {
    1223             :     &AMDGPU::VGPR_32RegClass,
    1224             :     &AMDGPU::SReg_32RegClass,
    1225             :     &AMDGPU::VReg_64RegClass,
    1226             :     &AMDGPU::SReg_64RegClass,
    1227             :     &AMDGPU::VReg_96RegClass,
    1228             :     &AMDGPU::VReg_128RegClass,
    1229             :     &AMDGPU::SReg_128RegClass,
    1230             :     &AMDGPU::VReg_256RegClass,
    1231             :     &AMDGPU::SReg_256RegClass,
    1232             :     &AMDGPU::VReg_512RegClass,
    1233             :     &AMDGPU::SReg_512RegClass,
    1234             :     &AMDGPU::SCC_CLASSRegClass,
    1235             :     &AMDGPU::R600_Reg32RegClass,
    1236             :     &AMDGPU::R600_PredicateRegClass,
    1237             :     &AMDGPU::Pseudo_SReg_32RegClass,
    1238             :     &AMDGPU::Pseudo_SReg_128RegClass,
    1239             :   };
    1240             : 
    1241    27708323 :   for (const TargetRegisterClass *BaseClass : BaseClasses) {
    1242    29956220 :     if (BaseClass->contains(Reg)) {
    1243             :       return BaseClass;
    1244             :     }
    1245             :   }
    1246             :   return nullptr;
    1247             : }
    1248             : 
    1249             : // TODO: It might be helpful to have some target specific flags in
    1250             : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
    1251    11039307 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
    1252             :   unsigned Size = getRegSizeInBits(*RC);
    1253    11039307 :   if (Size < 32)
    1254             :     return false;
    1255    11034019 :   switch (Size) {
    1256     5764109 :   case 32:
    1257     5764109 :     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
    1258     3895115 :   case 64:
    1259     3895115 :     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
    1260        1283 :   case 96:
    1261        1283 :     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
    1262     1199701 :   case 128:
    1263     1199701 :     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
    1264      129450 :   case 256:
    1265      129450 :     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
    1266       44361 :   case 512:
    1267       44361 :     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
    1268           0 :   default:
    1269           0 :     llvm_unreachable("Invalid register class size");
    1270             :   }
    1271             : }
    1272             : 
    1273      139736 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
    1274             :                                          const TargetRegisterClass *SRC) const {
    1275      139736 :   switch (getRegSizeInBits(*SRC)) {
    1276             :   case 32:
    1277             :     return &AMDGPU::VGPR_32RegClass;
    1278       15462 :   case 64:
    1279       15462 :     return &AMDGPU::VReg_64RegClass;
    1280           0 :   case 96:
    1281           0 :     return &AMDGPU::VReg_96RegClass;
    1282        5619 :   case 128:
    1283        5619 :     return &AMDGPU::VReg_128RegClass;
    1284          61 :   case 256:
    1285          61 :     return &AMDGPU::VReg_256RegClass;
    1286          51 :   case 512:
    1287          51 :     return &AMDGPU::VReg_512RegClass;
    1288           0 :   default:
    1289           0 :     llvm_unreachable("Invalid register class size");
    1290             :   }
    1291             : }
    1292             : 
    1293        1735 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
    1294             :                                          const TargetRegisterClass *VRC) const {
    1295        1735 :   switch (getRegSizeInBits(*VRC)) {
    1296             :   case 32:
    1297             :     return &AMDGPU::SGPR_32RegClass;
    1298         241 :   case 64:
    1299         241 :     return &AMDGPU::SReg_64RegClass;
    1300          12 :   case 128:
    1301          12 :     return &AMDGPU::SReg_128RegClass;
    1302          14 :   case 256:
    1303          14 :     return &AMDGPU::SReg_256RegClass;
    1304           0 :   case 512:
    1305           0 :     return &AMDGPU::SReg_512RegClass;
    1306           0 :   default:
    1307           0 :     llvm_unreachable("Invalid register class size");
    1308             :   }
    1309             : }
    1310             : 
    1311      367578 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
    1312             :                          const TargetRegisterClass *RC, unsigned SubIdx) const {
    1313      367578 :   if (SubIdx == AMDGPU::NoSubRegister)
    1314             :     return RC;
    1315             : 
    1316             :   // We can assume that each lane corresponds to one 32-bit register.
    1317       60841 :   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
    1318       60841 :   if (isSGPRClass(RC)) {
    1319       29954 :     switch (Count) {
    1320             :     case 1:
    1321             :       return &AMDGPU::SGPR_32RegClass;
    1322           0 :     case 2:
    1323           0 :       return &AMDGPU::SReg_64RegClass;
    1324           0 :     case 4:
    1325           0 :       return &AMDGPU::SReg_128RegClass;
    1326           0 :     case 8:
    1327           0 :       return &AMDGPU::SReg_256RegClass;
    1328           0 :     case 16: /* fall-through */
    1329             :     default:
    1330           0 :       llvm_unreachable("Invalid sub-register class size");
    1331             :     }
    1332             :   } else {
    1333       30887 :     switch (Count) {
    1334             :     case 1:
    1335             :       return &AMDGPU::VGPR_32RegClass;
    1336          58 :     case 2:
    1337          58 :       return &AMDGPU::VReg_64RegClass;
    1338           0 :     case 3:
    1339           0 :       return &AMDGPU::VReg_96RegClass;
    1340           0 :     case 4:
    1341           0 :       return &AMDGPU::VReg_128RegClass;
    1342           0 :     case 8:
    1343           0 :       return &AMDGPU::VReg_256RegClass;
    1344           0 :     case 16: /* fall-through */
    1345             :     default:
    1346           0 :       llvm_unreachable("Invalid sub-register class size");
    1347             :     }
    1348             :   }
    1349             : }
    1350             : 
    1351      392403 : bool SIRegisterInfo::shouldRewriteCopySrc(
    1352             :   const TargetRegisterClass *DefRC,
    1353             :   unsigned DefSubReg,
    1354             :   const TargetRegisterClass *SrcRC,
    1355             :   unsigned SrcSubReg) const {
    1356             :   // We want to prefer the smallest register class possible, so we don't want to
    1357             :   // stop and rewrite on anything that looks like a subregister
    1358             :   // extract. Operations mostly don't care about the super register class, so we
    1359             :   // only want to stop on the most basic of copies between the same register
    1360             :   // class.
    1361             :   //
    1362             :   // e.g. if we have something like
    1363             :   // %0 = ...
    1364             :   // %1 = ...
    1365             :   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
    1366             :   // %3 = COPY %2, sub0
    1367             :   //
    1368             :   // We want to look through the COPY to find:
    1369             :   //  => %3 = COPY %0
    1370             : 
    1371             :   // Plain copy.
    1372      392403 :   return getCommonSubClass(DefRC, SrcRC) != nullptr;
    1373             : }
    1374             : 
    1375             : /// Returns a register that is not used at any point in the function.
    1376             : ///        If all registers are used, then this function will return
    1377             : //         AMDGPU::NoRegister.
    1378             : unsigned
    1379         140 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
    1380             :                                    const TargetRegisterClass *RC,
    1381             :                                    const MachineFunction &MF) const {
    1382             : 
    1383        8554 :   for (unsigned Reg : *RC)
    1384        4273 :     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
    1385             :       return Reg;
    1386             :   return AMDGPU::NoRegister;
    1387             : }
    1388             : 
    1389        8625 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
    1390             :                                                    unsigned EltSize) const {
    1391        8625 :   if (EltSize == 4) {
    1392             :     static const int16_t Sub0_15[] = {
    1393             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1394             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1395             :       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
    1396             :       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
    1397             :     };
    1398             : 
    1399             :     static const int16_t Sub0_7[] = {
    1400             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1401             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1402             :     };
    1403             : 
    1404             :     static const int16_t Sub0_3[] = {
    1405             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1406             :     };
    1407             : 
    1408             :     static const int16_t Sub0_2[] = {
    1409             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
    1410             :     };
    1411             : 
    1412             :     static const int16_t Sub0_1[] = {
    1413             :       AMDGPU::sub0, AMDGPU::sub1,
    1414             :     };
    1415             : 
    1416        8472 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1417        1015 :     case 32:
    1418        1015 :       return {};
    1419             :     case 64:
    1420             :       return makeArrayRef(Sub0_1);
    1421             :     case 96:
    1422             :       return makeArrayRef(Sub0_2);
    1423             :     case 128:
    1424             :       return makeArrayRef(Sub0_3);
    1425             :     case 256:
    1426             :       return makeArrayRef(Sub0_7);
    1427             :     case 512:
    1428             :       return makeArrayRef(Sub0_15);
    1429           0 :     default:
    1430           0 :       llvm_unreachable("unhandled register size");
    1431             :     }
    1432             :   }
    1433             : 
    1434         153 :   if (EltSize == 8) {
    1435             :     static const int16_t Sub0_15_64[] = {
    1436             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1437             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
    1438             :       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
    1439             :       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
    1440             :     };
    1441             : 
    1442             :     static const int16_t Sub0_7_64[] = {
    1443             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1444             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
    1445             :     };
    1446             : 
    1447             : 
    1448             :     static const int16_t Sub0_3_64[] = {
    1449             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
    1450             :     };
    1451             : 
    1452         147 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1453          16 :     case 64:
    1454          16 :       return {};
    1455             :     case 128:
    1456             :       return makeArrayRef(Sub0_3_64);
    1457             :     case 256:
    1458             :       return makeArrayRef(Sub0_7_64);
    1459             :     case 512:
    1460             :       return makeArrayRef(Sub0_15_64);
    1461           0 :     default:
    1462           0 :       llvm_unreachable("unhandled register size");
    1463             :     }
    1464             :   }
    1465             : 
    1466             :   assert(EltSize == 16 && "unhandled register spill split size");
    1467             : 
    1468             :   static const int16_t Sub0_15_128[] = {
    1469             :     AMDGPU::sub0_sub1_sub2_sub3,
    1470             :     AMDGPU::sub4_sub5_sub6_sub7,
    1471             :     AMDGPU::sub8_sub9_sub10_sub11,
    1472             :     AMDGPU::sub12_sub13_sub14_sub15
    1473             :   };
    1474             : 
    1475             :   static const int16_t Sub0_7_128[] = {
    1476             :     AMDGPU::sub0_sub1_sub2_sub3,
    1477             :     AMDGPU::sub4_sub5_sub6_sub7
    1478             :   };
    1479             : 
    1480           6 :   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1481           4 :   case 128:
    1482           4 :     return {};
    1483             :   case 256:
    1484             :     return makeArrayRef(Sub0_7_128);
    1485             :   case 512:
    1486             :     return makeArrayRef(Sub0_15_128);
    1487           0 :   default:
    1488           0 :     llvm_unreachable("unhandled register size");
    1489             :   }
    1490             : }
    1491             : 
    1492             : const TargetRegisterClass*
    1493     4359764 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
    1494             :                                   unsigned Reg) const {
    1495     4359764 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
    1496      409215 :     return  MRI.getRegClass(Reg);
    1497             : 
    1498     3950549 :   return getPhysRegClass(Reg);
    1499             : }
    1500             : 
    1501     4160351 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
    1502             :                             unsigned Reg) const {
    1503     4160351 :   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
    1504             :   assert(RC && "Register class for the reg not found");
    1505     4160351 :   return hasVGPRs(RC);
    1506             : }
    1507             : 
    1508      157297 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
    1509             :                                     const TargetRegisterClass *SrcRC,
    1510             :                                     unsigned SubReg,
    1511             :                                     const TargetRegisterClass *DstRC,
    1512             :                                     unsigned DstSubReg,
    1513             :                                     const TargetRegisterClass *NewRC,
    1514             :                                     LiveIntervals &LIS) const {
    1515             :   unsigned SrcSize = getRegSizeInBits(*SrcRC);
    1516             :   unsigned DstSize = getRegSizeInBits(*DstRC);
    1517             :   unsigned NewSize = getRegSizeInBits(*NewRC);
    1518             : 
    1519             :   // Do not increase size of registers beyond dword, we would need to allocate
    1520             :   // adjacent registers and constraint regalloc more than needed.
    1521             : 
    1522             :   // Always allow dword coalescing.
    1523      157297 :   if (SrcSize <= 32 || DstSize <= 32)
    1524             :     return true;
    1525             : 
    1526       43364 :   return NewSize <= DstSize || NewSize <= SrcSize;
    1527             : }
    1528             : 
    1529      112204 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    1530             :                                              MachineFunction &MF) const {
    1531             : 
    1532      112204 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    1533      112204 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    1534             : 
    1535      112204 :   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
    1536      112204 :                                                        MF.getFunction());
    1537      224408 :   switch (RC->getID()) {
    1538             :   default:
    1539             :     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
    1540       56102 :   case AMDGPU::VGPR_32RegClassID:
    1541      168306 :     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
    1542       56102 :   case AMDGPU::SGPR_32RegClassID:
    1543      168306 :     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
    1544             :   }
    1545             : }
    1546             : 
    1547     1043579 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
    1548             :                                                 unsigned Idx) const {
    1549     1043579 :   if (Idx == getVGPRPressureSet())
    1550             :     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
    1551       56102 :                                const_cast<MachineFunction &>(MF));
    1552             : 
    1553      987477 :   if (Idx == getSGPRPressureSet())
    1554             :     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
    1555       56102 :                                const_cast<MachineFunction &>(MF));
    1556             : 
    1557      931375 :   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
    1558             : }
    1559             : 
    1560     4616197 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
    1561             :   static const int Empty[] = { -1 };
    1562             : 
    1563     4616197 :   if (hasRegUnit(AMDGPU::M0, RegUnit))
    1564             :     return Empty;
    1565     4613956 :   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
    1566             : }
    1567             : 
    1568        3277 : unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
    1569             :   // Not a callee saved register.
    1570        3277 :   return AMDGPU::SGPR30_SGPR31;
    1571             : }
    1572             : 
    1573             : const TargetRegisterClass *
    1574          43 : SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
    1575             :                                          const MachineRegisterInfo &MRI) const {
    1576          43 :   unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
    1577          43 :   const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
    1578          19 :   if (!RB)
    1579             :     return nullptr;
    1580             : 
    1581          19 :   switch (Size) {
    1582          17 :   case 32:
    1583          17 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
    1584             :                                                   &AMDGPU::SReg_32_XM0RegClass;
    1585           2 :   case 64:
    1586           2 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
    1587             :                                                    &AMDGPU::SReg_64_XEXECRegClass;
    1588           0 :   case 96:
    1589           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
    1590             :                                                   nullptr;
    1591           0 :   case 128:
    1592           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
    1593             :                                                   &AMDGPU::SReg_128RegClass;
    1594           0 :   default:
    1595           0 :     llvm_unreachable("not implemented");
    1596             :   }
    1597      303507 : }

Generated by: LCOV version 1.13