LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIRegisterInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 514 598 86.0 %
Date: 2018-02-23 15:42:53 Functions: 39 43 90.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief SI implementation of the TargetRegisterInfo class.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "SIRegisterInfo.h"
      16             : #include "AMDGPUSubtarget.h"
      17             : #include "SIInstrInfo.h"
      18             : #include "SIMachineFunctionInfo.h"
      19             : #include "llvm/CodeGen/MachineFrameInfo.h"
      20             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      21             : #include "llvm/CodeGen/RegisterScavenging.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include "llvm/IR/LLVMContext.h"
      24             : 
      25             : using namespace llvm;
      26             : 
      27             : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
      28      617288 :   for (unsigned i = 0; PSets[i] != -1; ++i) {
      29      263676 :     if (PSets[i] == (int)PSetID)
      30             :       return true;
      31             :   }
      32             :   return false;
      33             : }
      34             : 
      35      110376 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
      36             :                                          BitVector &PressureSets) const {
      37      210532 :   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
      38      110376 :     const int *PSets = getRegUnitPressureSets(*U);
      39      110376 :     if (hasPressureSet(PSets, PSetID)) {
      40             :       PressureSets.set(PSetID);
      41             :       break;
      42             :     }
      43             :   }
      44      110376 : }
      45             : 
      46       81686 : static cl::opt<bool> EnableSpillSGPRToSMEM(
      47             :   "amdgpu-spill-sgpr-to-smem",
      48       81686 :   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
      49      245058 :   cl::init(false));
      50             : 
      51       81686 : static cl::opt<bool> EnableSpillSGPRToVGPR(
      52             :   "amdgpu-spill-sgpr-to-vgpr",
      53       81686 :   cl::desc("Enable spilling VGPRs to SGPRs"),
      54             :   cl::ReallyHidden,
      55      245058 :   cl::init(true));
      56             : 
      57        2044 : SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
      58             :   AMDGPURegisterInfo(),
      59             :   SGPRPressureSets(getNumRegPressureSets()),
      60             :   VGPRPressureSets(getNumRegPressureSets()),
      61             :   SpillSGPRToVGPR(false),
      62        2044 :   SpillSGPRToSMEM(false) {
      63        2044 :   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
      64           5 :     SpillSGPRToSMEM = true;
      65        2039 :   else if (EnableSpillSGPRToVGPR)
      66        2035 :     SpillSGPRToVGPR = true;
      67             : 
      68        2044 :   unsigned NumRegPressureSets = getNumRegPressureSets();
      69             : 
      70        2044 :   SGPRSetID = NumRegPressureSets;
      71        2044 :   VGPRSetID = NumRegPressureSets;
      72             : 
      73      112420 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      74       55188 :     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
      75       55188 :     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
      76             :   }
      77             : 
      78             :   // Determine the number of reg units for each pressure set.
      79        2044 :   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
      80     3632188 :   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
      81     3630144 :     const int *PSets = getRegUnitPressureSets(i);
      82    19961704 :     for (unsigned j = 0; PSets[j] != -1; ++j) {
      83    16331560 :       ++PressureSetRegUnits[PSets[j]];
      84             :     }
      85             :   }
      86             : 
      87             :   unsigned VGPRMax = 0, SGPRMax = 0;
      88      112420 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      89        6132 :     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
      90        2044 :       VGPRSetID = i;
      91        2044 :       VGPRMax = PressureSetRegUnits[i];
      92        2044 :       continue;
      93             :     }
      94       16352 :     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
      95        8176 :       SGPRSetID = i;
      96        8176 :       SGPRMax = PressureSetRegUnits[i];
      97             :     }
      98             :   }
      99             : 
     100             :   assert(SGPRSetID < NumRegPressureSets &&
     101             :          VGPRSetID < NumRegPressureSets);
     102        2044 : }
     103             : 
     104       15673 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
     105             :   const MachineFunction &MF) const {
     106             : 
     107       15673 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     108       31346 :   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
     109       15673 :   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     110       31346 :   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
     111             : }
     112             : 
     113             : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
     114             :   unsigned Reg;
     115             : 
     116             :   // Try to place it in a hole after PrivateSegmentBufferReg.
     117       15639 :   if (RegCount & 3) {
     118             :     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
     119             :     // alignment constraints, so we have a hole where can put the wave offset.
     120       15461 :     Reg = RegCount - 1;
     121             :   } else {
     122             :     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
     123             :     // wave offset before it.
     124         178 :     Reg = RegCount - 5;
     125             :   }
     126             : 
     127             :   return Reg;
     128             : }
     129             : 
     130       15639 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
     131             :   const MachineFunction &MF) const {
     132       15639 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     133       15639 :   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
     134       31278 :   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
     135             : }
     136             : 
     137        1502 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
     138             :   const MachineFunction &MF) const {
     139        1502 :   return AMDGPU::SGPR32;
     140             : }
     141             : 
     142       33603 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     143       33603 :   BitVector Reserved(getNumRegs());
     144             : 
     145             :   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
     146             :   // this seems likely to result in bugs, so I'm marking them as reserved.
     147       33603 :   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
     148       33603 :   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
     149             : 
     150             :   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
     151       33603 :   reserveRegisterTuples(Reserved, AMDGPU::M0);
     152             : 
     153             :   // Reserve the memory aperture registers.
     154       33603 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
     155       33603 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
     156       33603 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
     157       33603 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
     158             : 
     159             :   // Reserve xnack_mask registers - support is not implemented in Codegen.
     160       33603 :   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
     161             : 
     162             :   // Reserve Trap Handler registers - support is not implemented in Codegen.
     163       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TBA);
     164       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TMA);
     165       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
     166       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
     167       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
     168       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
     169       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
     170       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
     171       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
     172       33603 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
     173             : 
     174       33603 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     175             : 
     176       33603 :   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
     177       33603 :   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     178      314279 :   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     179      140338 :     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     180      140338 :     reserveRegisterTuples(Reserved, Reg);
     181             :   }
     182             : 
     183       33603 :   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
     184       33603 :   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
     185       47411 :   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     186        6904 :     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     187        6904 :     reserveRegisterTuples(Reserved, Reg);
     188             :   }
     189             : 
     190             :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     191             : 
     192       33603 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     193       33603 :   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
     194             :     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
     195       33603 :     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
     196             :   }
     197             : 
     198       33603 :   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
     199       33603 :   if (ScratchRSrcReg != AMDGPU::NoRegister) {
     200             :     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
     201             :     // to spill.
     202             :     // TODO: May need to reserve a VGPR if doing LDS spilling.
     203       33603 :     reserveRegisterTuples(Reserved, ScratchRSrcReg);
     204             :     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
     205             :   }
     206             : 
     207             :   // We have to assume the SP is needed in case there are calls in the function,
     208             :   // which is detected after the function is lowered. If we aren't really going
     209             :   // to need SP, don't bother reserving it.
     210       33603 :   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
     211             : 
     212       33603 :   if (StackPtrReg != AMDGPU::NoRegister) {
     213       33603 :     reserveRegisterTuples(Reserved, StackPtrReg);
     214             :     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
     215             :   }
     216             : 
     217       33603 :   unsigned FrameReg = MFI->getFrameOffsetReg();
     218       33603 :   if (FrameReg != AMDGPU::NoRegister) {
     219       33603 :     reserveRegisterTuples(Reserved, FrameReg);
     220             :     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
     221             :   }
     222             : 
     223       33603 :   return Reserved;
     224             : }
     225             : 
     226       33156 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
     227             :   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
     228       33156 :   if (Info->isEntryFunction()) {
     229       30810 :     const MachineFrameInfo &MFI = Fn.getFrameInfo();
     230       30810 :     return MFI.hasStackObjects() || MFI.hasCalls();
     231             :   }
     232             : 
     233             :   // May need scavenger for dealing with callee saved registers.
     234             :   return true;
     235             : }
     236             : 
     237       16578 : bool SIRegisterInfo::requiresFrameIndexScavenging(
     238             :   const MachineFunction &MF) const {
     239       16578 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     240       16578 :   if (MFI.hasStackObjects())
     241             :     return true;
     242             : 
     243             :   // May need to deal with callee saved registers.
     244             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     245       15955 :   return !Info->isEntryFunction();
     246             : }
     247             : 
     248       16262 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
     249             :   const MachineFunction &MF) const {
     250             :   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
     251             :   // create a virtual register for it during frame index elimination, so the
     252             :   // scavenger is directly needed.
     253       16885 :   return MF.getFrameInfo().hasStackObjects() &&
     254       16588 :          MF.getSubtarget<SISubtarget>().hasScalarStores() &&
     255       16588 :          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
     256             : }
     257             : 
     258       16569 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
     259             :   const MachineFunction &) const {
     260             :   // There are no special dedicated stack or frame pointers.
     261       16569 :   return true;
     262             : }
     263             : 
     264       33180 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     265             :   // This helps catch bugs as verifier errors.
     266       33180 :   return true;
     267             : }
     268             : 
     269        4559 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
     270             :   assert(SIInstrInfo::isMUBUF(*MI));
     271             : 
     272        9118 :   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     273             :                                           AMDGPU::OpName::offset);
     274        9118 :   return MI->getOperand(OffIdx).getImm();
     275             : }
     276             : 
     277           4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
     278             :                                                  int Idx) const {
     279           4 :   if (!SIInstrInfo::isMUBUF(*MI))
     280             :     return 0;
     281             : 
     282             :   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     283             :                                            AMDGPU::OpName::vaddr) &&
     284             :          "Should never see frame index on non-address operand");
     285             : 
     286           4 :   return getMUBUFInstrOffset(MI);
     287             : }
     288             : 
     289        4923 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     290        4923 :   if (!MI->mayLoadOrStore())
     291             :     return false;
     292             : 
     293        4555 :   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
     294             : 
     295        4555 :   return !isUInt<12>(FullOffset);
     296             : }
     297             : 
     298           0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
     299             :                                                   unsigned BaseReg,
     300             :                                                   int FrameIdx,
     301             :                                                   int64_t Offset) const {
     302             :   MachineBasicBlock::iterator Ins = MBB->begin();
     303           0 :   DebugLoc DL; // Defaults to "unknown"
     304             : 
     305           0 :   if (Ins != MBB->end())
     306             :     DL = Ins->getDebugLoc();
     307             : 
     308           0 :   MachineFunction *MF = MBB->getParent();
     309           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     310             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     311             : 
     312           0 :   if (Offset == 0) {
     313           0 :     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
     314             :       .addFrameIndex(FrameIdx);
     315             :     return;
     316             :   }
     317             : 
     318           0 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     319           0 :   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     320             : 
     321           0 :   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     322             : 
     323           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     324             :     .addImm(Offset);
     325           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
     326             :     .addFrameIndex(FrameIdx);
     327             : 
     328           0 :   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     329           0 :     .addReg(OffsetReg, RegState::Kill)
     330           0 :     .addReg(FIReg);
     331             : }
     332             : 
     333           0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     334             :                                        int64_t Offset) const {
     335             : 
     336           0 :   MachineBasicBlock *MBB = MI.getParent();
     337           0 :   MachineFunction *MF = MBB->getParent();
     338           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     339             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     340             : 
     341             : #ifndef NDEBUG
     342             :   // FIXME: Is it possible to be storing a frame index to itself?
     343             :   bool SeenFI = false;
     344             :   for (const MachineOperand &MO: MI.operands()) {
     345             :     if (MO.isFI()) {
     346             :       if (SeenFI)
     347             :         llvm_unreachable("should not see multiple frame indices");
     348             : 
     349             :       SeenFI = true;
     350             :     }
     351             :   }
     352             : #endif
     353             : 
     354           0 :   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
     355             :   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
     356             :   assert(TII->isMUBUF(MI));
     357             :   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
     358             :          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
     359             :          "should only be seeing frame offset relative FrameIndex");
     360             : 
     361             : 
     362           0 :   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
     363           0 :   int64_t NewOffset = OffsetOp->getImm() + Offset;
     364             :   assert(isUInt<12>(NewOffset) && "offset should be legal");
     365             : 
     366           0 :   FIOp->ChangeToRegister(BaseReg, false);
     367             :   OffsetOp->setImm(NewOffset);
     368           0 : }
     369             : 
     370           0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     371             :                                         unsigned BaseReg,
     372             :                                         int64_t Offset) const {
     373           0 :   if (!SIInstrInfo::isMUBUF(*MI))
     374             :     return false;
     375             : 
     376           0 :   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
     377             : 
     378           0 :   return isUInt<12>(NewOffset);
     379             : }
     380             : 
     381           0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
     382             :   const MachineFunction &MF, unsigned Kind) const {
     383             :   // This is inaccurate. It depends on the instruction and address space. The
     384             :   // only place where we should hit this is for dealing with frame indexes /
     385             :   // private accesses, so this is correct in that case.
     386           0 :   return &AMDGPU::VGPR_32RegClass;
     387             : }
     388             : 
     389        1260 : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
     390             : 
     391        1260 :   switch (Op) {
     392             :   case AMDGPU::SI_SPILL_S512_SAVE:
     393             :   case AMDGPU::SI_SPILL_S512_RESTORE:
     394             :   case AMDGPU::SI_SPILL_V512_SAVE:
     395             :   case AMDGPU::SI_SPILL_V512_RESTORE:
     396             :     return 16;
     397           0 :   case AMDGPU::SI_SPILL_S256_SAVE:
     398             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     399             :   case AMDGPU::SI_SPILL_V256_SAVE:
     400             :   case AMDGPU::SI_SPILL_V256_RESTORE:
     401           0 :     return 8;
     402         663 :   case AMDGPU::SI_SPILL_S128_SAVE:
     403             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     404             :   case AMDGPU::SI_SPILL_V128_SAVE:
     405             :   case AMDGPU::SI_SPILL_V128_RESTORE:
     406         663 :     return 4;
     407           0 :   case AMDGPU::SI_SPILL_V96_SAVE:
     408             :   case AMDGPU::SI_SPILL_V96_RESTORE:
     409           0 :     return 3;
     410           0 :   case AMDGPU::SI_SPILL_S64_SAVE:
     411             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     412             :   case AMDGPU::SI_SPILL_V64_SAVE:
     413             :   case AMDGPU::SI_SPILL_V64_RESTORE:
     414           0 :     return 2;
     415         597 :   case AMDGPU::SI_SPILL_S32_SAVE:
     416             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     417             :   case AMDGPU::SI_SPILL_V32_SAVE:
     418             :   case AMDGPU::SI_SPILL_V32_RESTORE:
     419         597 :     return 1;
     420           0 :   default: llvm_unreachable("Invalid spill opcode");
     421             :   }
     422             : }
     423             : 
     424             : static int getOffsetMUBUFStore(unsigned Opc) {
     425        3159 :   switch (Opc) {
     426             :   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
     427             :     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
     428             :   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
     429             :     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
     430             :   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
     431             :     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
     432             :   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
     433             :     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
     434             :   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
     435             :     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
     436             :   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
     437             :     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
     438             :   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
     439             :     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
     440             :   default:
     441             :     return -1;
     442             :   }
     443             : }
     444             : 
     445        1803 : static int getOffsetMUBUFLoad(unsigned Opc) {
     446        1803 :   switch (Opc) {
     447             :   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
     448             :     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
     449          98 :   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
     450          98 :     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
     451           6 :   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
     452           6 :     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
     453          26 :   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
     454          26 :     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
     455           2 :   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
     456           2 :     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
     457           5 :   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
     458           5 :     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
     459          16 :   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
     460          16 :     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
     461           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
     462           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
     463           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
     464           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
     465           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
     466           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
     467           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
     468           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
     469           3 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
     470           3 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
     471           4 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
     472           4 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
     473           0 :   default:
     474           0 :     return -1;
     475             :   }
     476             : }
     477             : 
     478             : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
     479             : // need to handle the case where an SGPR may need to be spilled while spilling.
     480        4962 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     481             :                                       MachineFrameInfo &MFI,
     482             :                                       MachineBasicBlock::iterator MI,
     483             :                                       int Index,
     484             :                                       int64_t Offset) {
     485        4962 :   MachineBasicBlock *MBB = MI->getParent();
     486             :   const DebugLoc &DL = MI->getDebugLoc();
     487        4962 :   bool IsStore = MI->mayStore();
     488             : 
     489        4962 :   unsigned Opc = MI->getOpcode();
     490        6765 :   int LoadStoreOp = IsStore ?
     491             :     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
     492        1803 :   if (LoadStoreOp == -1)
     493             :     return false;
     494             : 
     495        4962 :   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
     496        9924 :   MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
     497             :     .add(*Reg)
     498        4962 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
     499        4962 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
     500             :     .addImm(Offset)
     501             :     .addImm(0) // glc
     502             :     .addImm(0) // slc
     503             :     .addImm(0) // tfe
     504        4962 :     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     505             : 
     506             :   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
     507        4962 :                                                        AMDGPU::OpName::vdata_in);
     508        4962 :   if (VDataIn)
     509             :     NewMI.add(*VDataIn);
     510             :   return true;
     511             : }
     512             : 
     513        2432 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     514             :                                          unsigned LoadStoreOp,
     515             :                                          int Index,
     516             :                                          unsigned ValueReg,
     517             :                                          bool IsKill,
     518             :                                          unsigned ScratchRsrcReg,
     519             :                                          unsigned ScratchOffsetReg,
     520             :                                          int64_t InstOffset,
     521             :                                          MachineMemOperand *MMO,
     522             :                                          RegScavenger *RS) const {
     523        2432 :   MachineBasicBlock *MBB = MI->getParent();
     524        2432 :   MachineFunction *MF = MI->getParent()->getParent();
     525        2432 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     526             :   const SIInstrInfo *TII = ST.getInstrInfo();
     527        2432 :   const MachineFrameInfo &MFI = MF->getFrameInfo();
     528             : 
     529        2432 :   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
     530             :   const DebugLoc &DL = MI->getDebugLoc();
     531        2432 :   bool IsStore = Desc.mayStore();
     532             : 
     533             :   bool RanOutOfSGPRs = false;
     534             :   bool Scavenged = false;
     535             :   unsigned SOffset = ScratchOffsetReg;
     536             : 
     537        2432 :   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
     538        4864 :   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
     539        2432 :   unsigned Size = NumSubRegs * 4;
     540        2432 :   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
     541             :   const int64_t OriginalImmOffset = Offset;
     542             : 
     543             :   unsigned Align = MFI.getObjectAlignment(Index);
     544             :   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
     545             : 
     546        2432 :   if (!isUInt<12>(Offset + Size)) {
     547             :     SOffset = AMDGPU::NoRegister;
     548             : 
     549             :     // We don't have access to the register scavenger if this function is called
     550             :     // during  PEI::scavengeFrameVirtualRegs().
     551         232 :     if (RS)
     552           0 :       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
     553             : 
     554           0 :     if (SOffset == AMDGPU::NoRegister) {
     555             :       // There are no free SGPRs, and since we are in the process of spilling
     556             :       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
     557             :       // on SI/CI and on VI it is true until we implement spilling using scalar
     558             :       // stores), we have no way to free up an SGPR.  Our solution here is to
     559             :       // add the offset directly to the ScratchOffset register, and then
     560             :       // subtract the offset after the spill to return ScratchOffset to it's
     561             :       // original value.
     562             :       RanOutOfSGPRs = true;
     563             :       SOffset = ScratchOffsetReg;
     564             :     } else {
     565             :       Scavenged = true;
     566             :     }
     567             : 
     568         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
     569         232 :       .addReg(ScratchOffsetReg)
     570             :       .addImm(Offset);
     571             : 
     572             :     Offset = 0;
     573             :   }
     574             : 
     575             :   const unsigned EltSize = 4;
     576             : 
     577       15270 :   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     578       11735 :     unsigned SubReg = NumSubRegs == 1 ?
     579        5316 :       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
     580             : 
     581             :     unsigned SOffsetRegState = 0;
     582             :     unsigned SrcDstRegState = getDefRegState(!IsStore);
     583        6419 :     if (i + 1 == e) {
     584             :       SOffsetRegState |= getKillRegState(Scavenged);
     585             :       // The last implicit use carries the "Kill" flag.
     586        2432 :       SrcDstRegState |= getKillRegState(IsKill);
     587             :     }
     588             : 
     589        6419 :     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
     590             :     MachineMemOperand *NewMMO
     591       12838 :       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
     592        6419 :                                  EltSize, MinAlign(Align, EltSize * i));
     593             : 
     594       12838 :     auto MIB = BuildMI(*MBB, MI, DL, Desc)
     595        6419 :       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
     596        6419 :       .addReg(ScratchRsrcReg)
     597        6419 :       .addReg(SOffset, SOffsetRegState)
     598             :       .addImm(Offset)
     599             :       .addImm(0) // glc
     600             :       .addImm(0) // slc
     601             :       .addImm(0) // tfe
     602        6419 :       .addMemOperand(NewMMO);
     603             : 
     604        6419 :     if (NumSubRegs > 1)
     605        5316 :       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
     606             :   }
     607             : 
     608        2432 :   if (RanOutOfSGPRs) {
     609             :     // Subtract the offset we added to the ScratchOffset register.
     610         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
     611         232 :       .addReg(ScratchOffsetReg)
     612             :       .addImm(OriginalImmOffset);
     613             :   }
     614        2432 : }
     615             : 
     616             : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
     617             :                                                      bool Store) {
     618          28 :   if (SuperRegSize % 16 == 0) {
     619             :     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
     620             :                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
     621             :   }
     622             : 
     623          22 :   if (SuperRegSize % 8 == 0) {
     624             :     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
     625             :                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
     626             :   }
     627             : 
     628             :   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
     629             :                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
     630             : }
     631             : 
     632         612 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
     633             :                                int Index,
     634             :                                RegScavenger *RS,
     635             :                                bool OnlyToVGPR) const {
     636         612 :   MachineBasicBlock *MBB = MI->getParent();
     637         612 :   MachineFunction *MF = MBB->getParent();
     638         612 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     639             : 
     640             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     641         612 :     = MFI->getSGPRToVGPRSpills(Index);
     642         612 :   bool SpillToVGPR = !VGPRSpills.empty();
     643         612 :   if (OnlyToVGPR && !SpillToVGPR)
     644             :     return false;
     645             : 
     646         612 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     647         612 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     648             :   const SIInstrInfo *TII = ST.getInstrInfo();
     649             : 
     650         612 :   unsigned SuperReg = MI->getOperand(0).getReg();
     651             :   bool IsKill = MI->getOperand(0).isKill();
     652             :   const DebugLoc &DL = MI->getDebugLoc();
     653             : 
     654         612 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     655             : 
     656         612 :   bool SpillToSMEM = spillSGPRToSMEM();
     657         612 :   if (SpillToSMEM && OnlyToVGPR)
     658             :     return false;
     659             : 
     660             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     661             : 
     662             :   unsigned OffsetReg = AMDGPU::M0;
     663             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     664             : 
     665         612 :   if (SpillToSMEM) {
     666          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     667          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     668          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     669          14 :         .addReg(AMDGPU::M0);
     670             :     }
     671             :   }
     672             : 
     673             :   unsigned ScalarStoreOp;
     674         612 :   unsigned EltSize = 4;
     675         612 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     676         626 :   if (SpillToSMEM && isSGPRClass(RC)) {
     677             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     678             :     // able to spill wider vmem spills.
     679             :     std::tie(EltSize, ScalarStoreOp) =
     680          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
     681             :   }
     682             : 
     683         612 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     684         612 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     685             : 
     686             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     687         612 :   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
     688        2974 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     689        1951 :     unsigned SubReg = NumSubRegs == 1 ?
     690        1540 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     691             : 
     692        1181 :     if (SpillToSMEM) {
     693             :       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     694             : 
     695             :       // The allocated memory size is really the wavefront size * the frame
     696             :       // index size. The widest register class is 64 bytes, so a 4-byte scratch
     697             :       // allocation is enough to spill this in a single stack object.
     698             :       //
     699             :       // FIXME: Frame size/offsets are computed earlier than this, so the extra
     700             :       // space is still unnecessarily allocated.
     701             : 
     702             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     703             :       MachinePointerInfo PtrInfo
     704          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     705             :       MachineMemOperand *MMO
     706          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     707          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     708             : 
     709             :       // SMEM instructions only support a single offset, so increment the wave
     710             :       // offset.
     711             : 
     712          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     713          15 :       if (Offset != 0) {
     714          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     715          15 :           .addReg(MFI->getFrameOffsetReg())
     716             :           .addImm(Offset);
     717             :       } else {
     718           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     719           0 :           .addReg(MFI->getFrameOffsetReg());
     720             :       }
     721             : 
     722          45 :       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
     723          15 :         .addReg(SubReg, getKillRegState(IsKill)) // sdata
     724          15 :         .addReg(MFI->getScratchRSrcReg())        // sbase
     725          15 :         .addReg(OffsetReg, RegState::Kill)       // soff
     726             :         .addImm(0)                               // glc
     727             :         .addMemOperand(MMO);
     728             : 
     729          15 :       continue;
     730             :     }
     731             : 
     732        1166 :     if (SpillToVGPR) {
     733        2164 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     734             : 
     735        2164 :       BuildMI(*MBB, MI, DL,
     736             :               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
     737             :               Spill.VGPR)
     738        1082 :         .addReg(SubReg, getKillRegState(IsKill))
     739        1082 :         .addImm(Spill.Lane);
     740             : 
     741             :       // FIXME: Since this spills to another register instead of an actual
     742             :       // frame index, we should delete the frame index when all references to
     743             :       // it are fixed.
     744             :     } else {
     745             :       // XXX - Can to VGPR spill fail for some subregisters but not others?
     746          84 :       if (OnlyToVGPR)
     747           0 :         return false;
     748             : 
     749             :       // Spill SGPR to a frame index.
     750             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     751          84 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     752             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     753             : 
     754             :       MachineInstrBuilder Mov
     755         252 :         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
     756          84 :         .addReg(SubReg, SubKillState);
     757             : 
     758             : 
     759             :       // There could be undef components of a spilled super register.
     760             :       // TODO: Can we detect this and skip the spill?
     761          84 :       if (NumSubRegs > 1) {
     762             :         // The last implicit use of the SuperReg carries the "Kill" flag.
     763             :         unsigned SuperKillState = 0;
     764          76 :         if (i + 1 == e)
     765             :           SuperKillState |= getKillRegState(IsKill);
     766          76 :         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
     767             :       }
     768             : 
     769             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     770             :       MachinePointerInfo PtrInfo
     771          84 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     772             :       MachineMemOperand *MMO
     773         168 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     774          84 :                                    EltSize, MinAlign(Align, EltSize * i));
     775         252 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
     776          84 :         .addReg(TmpReg, RegState::Kill)    // src
     777             :         .addFrameIndex(Index)              // vaddr
     778          84 :         .addReg(MFI->getScratchRSrcReg())  // srrsrc
     779          84 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     780          84 :         .addImm(i * 4)                     // offset
     781             :         .addMemOperand(MMO);
     782             :     }
     783             :   }
     784             : 
     785         612 :   if (M0CopyReg != AMDGPU::NoRegister) {
     786          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     787          14 :       .addReg(M0CopyReg, RegState::Kill);
     788             :   }
     789             : 
     790         612 :   MI->eraseFromParent();
     791             :   MFI->addToSpilledSGPRs(NumSubRegs);
     792         612 :   return true;
     793             : }
     794             : 
     795         597 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
     796             :                                  int Index,
     797             :                                  RegScavenger *RS,
     798             :                                  bool OnlyToVGPR) const {
     799         597 :   MachineFunction *MF = MI->getParent()->getParent();
     800         597 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     801             :   MachineBasicBlock *MBB = MI->getParent();
     802         597 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     803             : 
     804             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     805         597 :     = MFI->getSGPRToVGPRSpills(Index);
     806         597 :   bool SpillToVGPR = !VGPRSpills.empty();
     807         597 :   if (OnlyToVGPR && !SpillToVGPR)
     808             :     return false;
     809             : 
     810         597 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     811         597 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     812             :   const SIInstrInfo *TII = ST.getInstrInfo();
     813             :   const DebugLoc &DL = MI->getDebugLoc();
     814             : 
     815         597 :   unsigned SuperReg = MI->getOperand(0).getReg();
     816         597 :   bool SpillToSMEM = spillSGPRToSMEM();
     817         597 :   if (SpillToSMEM && OnlyToVGPR)
     818             :     return false;
     819             : 
     820             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     821             : 
     822             :   unsigned OffsetReg = AMDGPU::M0;
     823             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     824             : 
     825         597 :   if (SpillToSMEM) {
     826          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     827          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     828          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     829          14 :         .addReg(AMDGPU::M0);
     830             :     }
     831             :   }
     832             : 
     833         597 :   unsigned EltSize = 4;
     834             :   unsigned ScalarLoadOp;
     835             : 
     836         597 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     837         611 :   if (SpillToSMEM && isSGPRClass(RC)) {
     838             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     839             :     // able to spill wider vmem spills.
     840             :     std::tie(EltSize, ScalarLoadOp) =
     841          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
     842             :   }
     843             : 
     844         597 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     845         597 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     846             : 
     847             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     848             :   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     849             : 
     850        2917 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     851        1918 :     unsigned SubReg = NumSubRegs == 1 ?
     852        1516 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     853             : 
     854        1160 :     if (SpillToSMEM) {
     855             :       // FIXME: Size may be > 4 but extra bytes wasted.
     856             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     857             :       MachinePointerInfo PtrInfo
     858          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     859             :       MachineMemOperand *MMO
     860          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
     861          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     862             : 
     863             :       // Add i * 4 offset
     864          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     865          15 :       if (Offset != 0) {
     866          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     867          15 :           .addReg(MFI->getFrameOffsetReg())
     868             :           .addImm(Offset);
     869             :       } else {
     870           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     871           0 :           .addReg(MFI->getFrameOffsetReg());
     872             :       }
     873             : 
     874             :       auto MIB =
     875          45 :         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
     876          15 :         .addReg(MFI->getScratchRSrcReg()) // sbase
     877          15 :         .addReg(OffsetReg, RegState::Kill)                // soff
     878             :         .addImm(0)                        // glc
     879          15 :         .addMemOperand(MMO);
     880             : 
     881          15 :       if (NumSubRegs > 1)
     882           2 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     883             : 
     884          15 :       continue;
     885             :     }
     886             : 
     887        1145 :     if (SpillToVGPR) {
     888        2124 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     889             :       auto MIB =
     890        2124 :         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
     891             :                 SubReg)
     892        1062 :         .addReg(Spill.VGPR)
     893        2124 :         .addImm(Spill.Lane);
     894             : 
     895        1062 :       if (NumSubRegs > 1)
     896         680 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     897             :     } else {
     898          83 :       if (OnlyToVGPR)
     899           0 :         return false;
     900             : 
     901             :       // Restore SGPR from a stack slot.
     902             :       // FIXME: We should use S_LOAD_DWORD here for VI.
     903          83 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     904             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     905             : 
     906             :       MachinePointerInfo PtrInfo
     907          83 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     908             : 
     909         166 :       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
     910             :         MachineMemOperand::MOLoad, EltSize,
     911          83 :         MinAlign(Align, EltSize * i));
     912             : 
     913         249 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
     914             :         .addFrameIndex(Index)              // vaddr
     915          83 :         .addReg(MFI->getScratchRSrcReg())  // srsrc
     916          83 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     917          83 :         .addImm(i * 4)                     // offset
     918             :         .addMemOperand(MMO);
     919             : 
     920             :       auto MIB =
     921         249 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
     922          83 :         .addReg(TmpReg, RegState::Kill);
     923             : 
     924          83 :       if (NumSubRegs > 1)
     925          76 :         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
     926             :     }
     927             :   }
     928             : 
     929         597 :   if (M0CopyReg != AMDGPU::NoRegister) {
     930          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     931          14 :       .addReg(M0CopyReg, RegState::Kill);
     932             :   }
     933             : 
     934         597 :   MI->eraseFromParent();
     935         597 :   return true;
     936             : }
     937             : 
     938             : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
     939             : /// a VGPR and the stack slot can be safely eliminated when all other users are
     940             : /// handled.
     941        1114 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
     942             :   MachineBasicBlock::iterator MI,
     943             :   int FI,
     944             :   RegScavenger *RS) const {
     945        1114 :   switch (MI->getOpcode()) {
     946         564 :   case AMDGPU::SI_SPILL_S512_SAVE:
     947             :   case AMDGPU::SI_SPILL_S256_SAVE:
     948             :   case AMDGPU::SI_SPILL_S128_SAVE:
     949             :   case AMDGPU::SI_SPILL_S64_SAVE:
     950             :   case AMDGPU::SI_SPILL_S32_SAVE:
     951         564 :     return spillSGPR(MI, FI, RS, true);
     952         550 :   case AMDGPU::SI_SPILL_S512_RESTORE:
     953             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     954             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     955             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     956             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     957         550 :     return restoreSGPR(MI, FI, RS, true);
     958           0 :   default:
     959           0 :     llvm_unreachable("not an SGPR spill instruction");
     960             :   }
     961             : }
     962             : 
     963        7873 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     964             :                                         int SPAdj, unsigned FIOperandNum,
     965             :                                         RegScavenger *RS) const {
     966        7873 :   MachineFunction *MF = MI->getParent()->getParent();
     967        7873 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     968             :   MachineBasicBlock *MBB = MI->getParent();
     969        7873 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     970        7873 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     971        7873 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     972             :   const SIInstrInfo *TII = ST.getInstrInfo();
     973             :   DebugLoc DL = MI->getDebugLoc();
     974             : 
     975        7873 :   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
     976        7873 :   int Index = MI->getOperand(FIOperandNum).getIndex();
     977             : 
     978       15746 :   switch (MI->getOpcode()) {
     979             :     // SGPR register spill
     980          48 :     case AMDGPU::SI_SPILL_S512_SAVE:
     981             :     case AMDGPU::SI_SPILL_S256_SAVE:
     982             :     case AMDGPU::SI_SPILL_S128_SAVE:
     983             :     case AMDGPU::SI_SPILL_S64_SAVE:
     984             :     case AMDGPU::SI_SPILL_S32_SAVE: {
     985          48 :       spillSGPR(MI, Index, RS);
     986          48 :       break;
     987             :     }
     988             : 
     989             :     // SGPR register restore
     990          47 :     case AMDGPU::SI_SPILL_S512_RESTORE:
     991             :     case AMDGPU::SI_SPILL_S256_RESTORE:
     992             :     case AMDGPU::SI_SPILL_S128_RESTORE:
     993             :     case AMDGPU::SI_SPILL_S64_RESTORE:
     994             :     case AMDGPU::SI_SPILL_S32_RESTORE: {
     995          47 :       restoreSGPR(MI, Index, RS);
     996          47 :       break;
     997             :     }
     998             : 
     999             :     // VGPR register spill
    1000             :     case AMDGPU::SI_SPILL_V512_SAVE:
    1001             :     case AMDGPU::SI_SPILL_V256_SAVE:
    1002             :     case AMDGPU::SI_SPILL_V128_SAVE:
    1003             :     case AMDGPU::SI_SPILL_V96_SAVE:
    1004             :     case AMDGPU::SI_SPILL_V64_SAVE:
    1005             :     case AMDGPU::SI_SPILL_V32_SAVE: {
    1006             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1007        1260 :                                                          AMDGPU::OpName::vdata);
    1008        2520 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
    1009             :             Index,
    1010             :             VData->getReg(), VData->isKill(),
    1011             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1012             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1013             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1014        1260 :             *MI->memoperands_begin(),
    1015             :             RS);
    1016        2520 :       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
    1017        1260 :       MI->eraseFromParent();
    1018        1260 :       break;
    1019             :     }
    1020             :     case AMDGPU::SI_SPILL_V32_RESTORE:
    1021             :     case AMDGPU::SI_SPILL_V64_RESTORE:
    1022             :     case AMDGPU::SI_SPILL_V96_RESTORE:
    1023             :     case AMDGPU::SI_SPILL_V128_RESTORE:
    1024             :     case AMDGPU::SI_SPILL_V256_RESTORE:
    1025             :     case AMDGPU::SI_SPILL_V512_RESTORE: {
    1026             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1027        1172 :                                                          AMDGPU::OpName::vdata);
    1028             : 
    1029        2344 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
    1030             :             Index,
    1031             :             VData->getReg(), VData->isKill(),
    1032             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1033             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1034             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1035        1172 :             *MI->memoperands_begin(),
    1036             :             RS);
    1037        1172 :       MI->eraseFromParent();
    1038        1172 :       break;
    1039             :     }
    1040             : 
    1041             :     default: {
    1042             :       const DebugLoc &DL = MI->getDebugLoc();
    1043             :       bool IsMUBUF = TII->isMUBUF(*MI);
    1044             : 
    1045        5714 :       if (!IsMUBUF &&
    1046         368 :           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
    1047             :         // Convert to an absolute stack address by finding the offset from the
    1048             :         // scratch wave base and scaling by the wave size.
    1049             :         //
    1050             :         // In an entry function/kernel the stack address is already the
    1051             :         // absolute address relative to the scratch wave offset.
    1052             : 
    1053             :         unsigned DiffReg
    1054          32 :           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1055             : 
    1056          32 :         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
    1057          63 :         unsigned ResultReg = IsCopy ?
    1058          31 :           MI->getOperand(0).getReg() :
    1059             :           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1060             : 
    1061          96 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
    1062          32 :           .addReg(MFI->getFrameOffsetReg())
    1063          32 :           .addReg(MFI->getScratchWaveOffsetReg());
    1064             : 
    1065             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1066          32 :         if (Offset == 0) {
    1067             :           // XXX - This never happens because of emergency scavenging slot at 0?
    1068           0 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
    1069           0 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1070           0 :             .addReg(DiffReg);
    1071             :         } else {
    1072             :           unsigned ScaledReg
    1073          32 :             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1074             : 
    1075          96 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
    1076          64 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1077          32 :             .addReg(DiffReg, RegState::Kill);
    1078             : 
    1079             :           // TODO: Fold if use instruction is another add of a constant.
    1080          32 :           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
    1081          56 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1082             :               .addImm(Offset)
    1083          28 :               .addReg(ScaledReg, RegState::Kill);
    1084             :           } else {
    1085             :             unsigned ConstOffsetReg
    1086           4 :               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1087             : 
    1088          12 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
    1089             :               .addImm(Offset);
    1090           8 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1091           4 :               .addReg(ConstOffsetReg, RegState::Kill)
    1092           4 :               .addReg(ScaledReg, RegState::Kill);
    1093             :           }
    1094             :         }
    1095             : 
    1096             :         // Don't introduce an extra copy if we're just materializing in a mov.
    1097          32 :         if (IsCopy)
    1098          31 :           MI->eraseFromParent();
    1099             :         else
    1100           1 :           FIOp.ChangeToRegister(ResultReg, false, false, true);
    1101             :         return;
    1102             :       }
    1103             : 
    1104        5314 :       if (IsMUBUF) {
    1105             :         // Disable offen so we don't need a 0 vgpr base.
    1106             :         assert(static_cast<int>(FIOperandNum) ==
    1107             :                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    1108             :                                           AMDGPU::OpName::vaddr));
    1109             : 
    1110             :         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
    1111             :                == MFI->getFrameOffsetReg());
    1112             : 
    1113             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1114             :         int64_t OldImm
    1115        4978 :           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
    1116        4978 :         int64_t NewOffset = OldImm + Offset;
    1117             : 
    1118        9940 :         if (isUInt<12>(NewOffset) &&
    1119        4962 :             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
    1120        4962 :           MI->eraseFromParent();
    1121        4962 :           return;
    1122             :         }
    1123             :       }
    1124             : 
    1125             :       // If the offset is simply too big, don't convert to a scratch wave offset
    1126             :       // relative index.
    1127             : 
    1128             :       int64_t Offset = FrameInfo.getObjectOffset(Index);
    1129         352 :       FIOp.ChangeToImmediate(Offset);
    1130         352 :       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
    1131          16 :         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1132          48 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    1133             :           .addImm(Offset);
    1134          16 :         FIOp.ChangeToRegister(TmpReg, false, false, true);
    1135             :       }
    1136             :     }
    1137             :   }
    1138             : }
    1139             : 
    1140     6804411 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
    1141             :   #define AMDGPU_REG_ASM_NAMES
    1142             :   #include "AMDGPURegAsmNames.inc.cpp"
    1143             : 
    1144             :   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
    1145             :     if (Reg >= BeginReg && Reg <= EndReg) {                \
    1146             :       unsigned Index = Reg - BeginReg;                     \
    1147             :       assert(Index < array_lengthof(RegTable));            \
    1148             :       return RegTable[Index];                              \
    1149             :     }
    1150             : 
    1151     7656597 :   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
    1152     6705801 :   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
    1153     5697039 :   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
    1154     4902551 :   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
    1155     4497967 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
    1156             :             VGPR96RegNames);
    1157             : 
    1158     4740620 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
    1159             :             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
    1160             :             VGPR128RegNames);
    1161     4305084 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
    1162             :             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
    1163             :             SGPR128RegNames);
    1164             : 
    1165     4443341 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
    1166             :             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1167             :             VGPR256RegNames);
    1168             : 
    1169     4196940 :   REG_RANGE(
    1170             :     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
    1171             :     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1172             :     VGPR512RegNames);
    1173             : 
    1174     3786210 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
    1175             :             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1176             :             SGPR256RegNames);
    1177             : 
    1178     3734562 :   REG_RANGE(
    1179             :     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
    1180             :     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1181             :     SGPR512RegNames
    1182             :   );
    1183             : 
    1184             : #undef REG_RANGE
    1185             : 
    1186             :   // FIXME: Rename flat_scr so we don't need to special case this.
    1187     3647234 :   switch (Reg) {
    1188             :   case AMDGPU::FLAT_SCR:
    1189        2911 :     return "flat_scratch";
    1190             :   case AMDGPU::FLAT_SCR_LO:
    1191        6115 :     return "flat_scratch_lo";
    1192             :   case AMDGPU::FLAT_SCR_HI:
    1193        6115 :     return "flat_scratch_hi";
    1194     3632093 :   default:
    1195             :     // For the special named registers the default is fine.
    1196     3632093 :     return TargetRegisterInfo::getRegAsmName(Reg);
    1197             :   }
    1198             : }
    1199             : 
    1200             : // FIXME: This is very slow. It might be worth creating a map from physreg to
    1201             : // register class.
    1202     4895811 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
    1203             :   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
    1204             : 
    1205             :   static const TargetRegisterClass *const BaseClasses[] = {
    1206             :     &AMDGPU::VGPR_32RegClass,
    1207             :     &AMDGPU::SReg_32RegClass,
    1208             :     &AMDGPU::VReg_64RegClass,
    1209             :     &AMDGPU::SReg_64RegClass,
    1210             :     &AMDGPU::VReg_96RegClass,
    1211             :     &AMDGPU::VReg_128RegClass,
    1212             :     &AMDGPU::SReg_128RegClass,
    1213             :     &AMDGPU::VReg_256RegClass,
    1214             :     &AMDGPU::SReg_256RegClass,
    1215             :     &AMDGPU::VReg_512RegClass,
    1216             :     &AMDGPU::SReg_512RegClass,
    1217             :     &AMDGPU::SCC_CLASSRegClass,
    1218             :   };
    1219             : 
    1220    26338129 :   for (const TargetRegisterClass *BaseClass : BaseClasses) {
    1221    28426745 :     if (BaseClass->contains(Reg)) {
    1222             :       return BaseClass;
    1223             :     }
    1224             :   }
    1225             :   return nullptr;
    1226             : }
    1227             : 
    1228             : // TODO: It might be helpful to have some target specific flags in
    1229             : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
    1230    10061128 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
    1231             :   unsigned Size = getRegSizeInBits(*RC);
    1232    10061128 :   if (Size < 32)
    1233             :     return false;
    1234    10056597 :   switch (Size) {
    1235     5292757 :   case 32:
    1236     5292757 :     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
    1237     3490691 :   case 64:
    1238     3490691 :     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
    1239        1247 :   case 96:
    1240        1247 :     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
    1241     1146422 :   case 128:
    1242     1146422 :     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
    1243       83788 :   case 256:
    1244       83788 :     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
    1245       41692 :   case 512:
    1246       41692 :     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
    1247           0 :   default:
    1248           0 :     llvm_unreachable("Invalid register class size");
    1249             :   }
    1250             : }
    1251             : 
    1252      127570 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
    1253             :                                          const TargetRegisterClass *SRC) const {
    1254      127570 :   switch (getRegSizeInBits(*SRC)) {
    1255             :   case 32:
    1256             :     return &AMDGPU::VGPR_32RegClass;
    1257       14614 :   case 64:
    1258       14614 :     return &AMDGPU::VReg_64RegClass;
    1259           0 :   case 96:
    1260           0 :     return &AMDGPU::VReg_96RegClass;
    1261        4813 :   case 128:
    1262        4813 :     return &AMDGPU::VReg_128RegClass;
    1263          53 :   case 256:
    1264          53 :     return &AMDGPU::VReg_256RegClass;
    1265          51 :   case 512:
    1266          51 :     return &AMDGPU::VReg_512RegClass;
    1267           0 :   default:
    1268           0 :     llvm_unreachable("Invalid register class size");
    1269             :   }
    1270             : }
    1271             : 
    1272        1674 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
    1273             :                                          const TargetRegisterClass *VRC) const {
    1274        1674 :   switch (getRegSizeInBits(*VRC)) {
    1275             :   case 32:
    1276             :     return &AMDGPU::SGPR_32RegClass;
    1277         237 :   case 64:
    1278         237 :     return &AMDGPU::SReg_64RegClass;
    1279          12 :   case 128:
    1280          12 :     return &AMDGPU::SReg_128RegClass;
    1281           2 :   case 256:
    1282           2 :     return &AMDGPU::SReg_256RegClass;
    1283           0 :   case 512:
    1284           0 :     return &AMDGPU::SReg_512RegClass;
    1285           0 :   default:
    1286           0 :     llvm_unreachable("Invalid register class size");
    1287             :   }
    1288             : }
    1289             : 
    1290      344870 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
    1291             :                          const TargetRegisterClass *RC, unsigned SubIdx) const {
    1292      344870 :   if (SubIdx == AMDGPU::NoSubRegister)
    1293             :     return RC;
    1294             : 
    1295             :   // We can assume that each lane corresponds to one 32-bit register.
    1296       54887 :   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
    1297       54887 :   if (isSGPRClass(RC)) {
    1298       28780 :     switch (Count) {
    1299             :     case 1:
    1300             :       return &AMDGPU::SGPR_32RegClass;
    1301           0 :     case 2:
    1302           0 :       return &AMDGPU::SReg_64RegClass;
    1303           0 :     case 4:
    1304           0 :       return &AMDGPU::SReg_128RegClass;
    1305           0 :     case 8:
    1306           0 :       return &AMDGPU::SReg_256RegClass;
    1307           0 :     case 16: /* fall-through */
    1308             :     default:
    1309           0 :       llvm_unreachable("Invalid sub-register class size");
    1310             :     }
    1311             :   } else {
    1312       26107 :     switch (Count) {
    1313             :     case 1:
    1314             :       return &AMDGPU::VGPR_32RegClass;
    1315          58 :     case 2:
    1316          58 :       return &AMDGPU::VReg_64RegClass;
    1317           0 :     case 3:
    1318           0 :       return &AMDGPU::VReg_96RegClass;
    1319           0 :     case 4:
    1320           0 :       return &AMDGPU::VReg_128RegClass;
    1321           0 :     case 8:
    1322           0 :       return &AMDGPU::VReg_256RegClass;
    1323           0 :     case 16: /* fall-through */
    1324             :     default:
    1325           0 :       llvm_unreachable("Invalid sub-register class size");
    1326             :     }
    1327             :   }
    1328             : }
    1329             : 
    1330      369474 : bool SIRegisterInfo::shouldRewriteCopySrc(
    1331             :   const TargetRegisterClass *DefRC,
    1332             :   unsigned DefSubReg,
    1333             :   const TargetRegisterClass *SrcRC,
    1334             :   unsigned SrcSubReg) const {
    1335             :   // We want to prefer the smallest register class possible, so we don't want to
    1336             :   // stop and rewrite on anything that looks like a subregister
    1337             :   // extract. Operations mostly don't care about the super register class, so we
    1338             :   // only want to stop on the most basic of copies between the same register
    1339             :   // class.
    1340             :   //
    1341             :   // e.g. if we have something like
    1342             :   // %0 = ...
    1343             :   // %1 = ...
    1344             :   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
    1345             :   // %3 = COPY %2, sub0
    1346             :   //
    1347             :   // We want to look through the COPY to find:
    1348             :   //  => %3 = COPY %0
    1349             : 
    1350             :   // Plain copy.
    1351      369474 :   return getCommonSubClass(DefRC, SrcRC) != nullptr;
    1352             : }
    1353             : 
    1354             : /// \brief Returns a register that is not used at any point in the function.
    1355             : ///        If all registers are used, then this function will return
    1356             : //         AMDGPU::NoRegister.
    1357             : unsigned
    1358         135 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
    1359             :                                    const TargetRegisterClass *RC,
    1360             :                                    const MachineFunction &MF) const {
    1361             : 
    1362        8386 :   for (unsigned Reg : *RC)
    1363        4189 :     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
    1364             :       return Reg;
    1365             :   return AMDGPU::NoRegister;
    1366             : }
    1367             : 
    1368        8058 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
    1369             :                                                    unsigned EltSize) const {
    1370        8058 :   if (EltSize == 4) {
    1371             :     static const int16_t Sub0_15[] = {
    1372             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1373             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1374             :       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
    1375             :       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
    1376             :     };
    1377             : 
    1378             :     static const int16_t Sub0_7[] = {
    1379             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1380             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1381             :     };
    1382             : 
    1383             :     static const int16_t Sub0_3[] = {
    1384             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1385             :     };
    1386             : 
    1387             :     static const int16_t Sub0_2[] = {
    1388             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
    1389             :     };
    1390             : 
    1391             :     static const int16_t Sub0_1[] = {
    1392             :       AMDGPU::sub0, AMDGPU::sub1,
    1393             :     };
    1394             : 
    1395        7905 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1396         793 :     case 32:
    1397         793 :       return {};
    1398             :     case 64:
    1399             :       return makeArrayRef(Sub0_1);
    1400             :     case 96:
    1401             :       return makeArrayRef(Sub0_2);
    1402             :     case 128:
    1403             :       return makeArrayRef(Sub0_3);
    1404             :     case 256:
    1405             :       return makeArrayRef(Sub0_7);
    1406             :     case 512:
    1407             :       return makeArrayRef(Sub0_15);
    1408           0 :     default:
    1409           0 :       llvm_unreachable("unhandled register size");
    1410             :     }
    1411             :   }
    1412             : 
    1413         153 :   if (EltSize == 8) {
    1414             :     static const int16_t Sub0_15_64[] = {
    1415             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1416             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
    1417             :       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
    1418             :       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
    1419             :     };
    1420             : 
    1421             :     static const int16_t Sub0_7_64[] = {
    1422             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1423             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
    1424             :     };
    1425             : 
    1426             : 
    1427             :     static const int16_t Sub0_3_64[] = {
    1428             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
    1429             :     };
    1430             : 
    1431         147 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1432          16 :     case 64:
    1433          16 :       return {};
    1434             :     case 128:
    1435             :       return makeArrayRef(Sub0_3_64);
    1436             :     case 256:
    1437             :       return makeArrayRef(Sub0_7_64);
    1438             :     case 512:
    1439             :       return makeArrayRef(Sub0_15_64);
    1440           0 :     default:
    1441           0 :       llvm_unreachable("unhandled register size");
    1442             :     }
    1443             :   }
    1444             : 
    1445             :   assert(EltSize == 16 && "unhandled register spill split size");
    1446             : 
    1447             :   static const int16_t Sub0_15_128[] = {
    1448             :     AMDGPU::sub0_sub1_sub2_sub3,
    1449             :     AMDGPU::sub4_sub5_sub6_sub7,
    1450             :     AMDGPU::sub8_sub9_sub10_sub11,
    1451             :     AMDGPU::sub12_sub13_sub14_sub15
    1452             :   };
    1453             : 
    1454             :   static const int16_t Sub0_7_128[] = {
    1455             :     AMDGPU::sub0_sub1_sub2_sub3,
    1456             :     AMDGPU::sub4_sub5_sub6_sub7
    1457             :   };
    1458             : 
    1459           6 :   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1460           4 :   case 128:
    1461           4 :     return {};
    1462             :   case 256:
    1463             :     return makeArrayRef(Sub0_7_128);
    1464             :   case 512:
    1465             :     return makeArrayRef(Sub0_15_128);
    1466           0 :   default:
    1467           0 :     llvm_unreachable("unhandled register size");
    1468             :   }
    1469             : }
    1470             : 
    1471             : const TargetRegisterClass*
    1472     3828445 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
    1473             :                                   unsigned Reg) const {
    1474     3828445 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
    1475      107541 :     return  MRI.getRegClass(Reg);
    1476             : 
    1477     3720904 :   return getPhysRegClass(Reg);
    1478             : }
    1479             : 
    1480     3664689 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
    1481             :                             unsigned Reg) const {
    1482     3664689 :   return hasVGPRs(getRegClassForReg(MRI, Reg));
    1483             : }
    1484             : 
    1485      148762 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
    1486             :                                     const TargetRegisterClass *SrcRC,
    1487             :                                     unsigned SubReg,
    1488             :                                     const TargetRegisterClass *DstRC,
    1489             :                                     unsigned DstSubReg,
    1490             :                                     const TargetRegisterClass *NewRC,
    1491             :                                     LiveIntervals &LIS) const {
    1492             :   unsigned SrcSize = getRegSizeInBits(*SrcRC);
    1493             :   unsigned DstSize = getRegSizeInBits(*DstRC);
    1494             :   unsigned NewSize = getRegSizeInBits(*NewRC);
    1495             : 
    1496             :   // Do not increase size of registers beyond dword, we would need to allocate
    1497             :   // adjacent registers and constraint regalloc more than needed.
    1498             : 
    1499             :   // Always allow dword coalescing.
    1500      148762 :   if (SrcSize <= 32 || DstSize <= 32)
    1501             :     return true;
    1502             : 
    1503       42524 :   return NewSize <= DstSize || NewSize <= SrcSize;
    1504             : }
    1505             : 
    1506      104088 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    1507             :                                              MachineFunction &MF) const {
    1508             : 
    1509      104088 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    1510      104088 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    1511             : 
    1512      104088 :   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
    1513      104088 :                                                        MF.getFunction());
    1514      208176 :   switch (RC->getID()) {
    1515             :   default:
    1516             :     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
    1517       52044 :   case AMDGPU::VGPR_32RegClassID:
    1518      156132 :     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
    1519       52044 :   case AMDGPU::SGPR_32RegClassID:
    1520      156132 :     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
    1521             :   }
    1522             : }
    1523             : 
    1524      968988 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
    1525             :                                                 unsigned Idx) const {
    1526      968988 :   if (Idx == getVGPRPressureSet())
    1527             :     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
    1528       52044 :                                const_cast<MachineFunction &>(MF));
    1529             : 
    1530      916944 :   if (Idx == getSGPRPressureSet())
    1531             :     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
    1532       52044 :                                const_cast<MachineFunction &>(MF));
    1533             : 
    1534      864900 :   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
    1535             : }
    1536             : 
    1537     4190670 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
    1538             :   static const int Empty[] = { -1 };
    1539             : 
    1540     4190670 :   if (hasRegUnit(AMDGPU::M0, RegUnit))
    1541             :     return Empty;
    1542     4188626 :   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
    1543      245058 : }

Generated by: LCOV version 1.13