LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIRegisterInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 507 606 83.7 %
Date: 2018-10-20 13:21:21 Functions: 37 42 88.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// SI implementation of the TargetRegisterInfo class.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "SIRegisterInfo.h"
      16             : #include "AMDGPURegisterBankInfo.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "SIInstrInfo.h"
      19             : #include "SIMachineFunctionInfo.h"
      20             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      21             : #include "llvm/CodeGen/MachineFrameInfo.h"
      22             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      23             : #include "llvm/CodeGen/RegisterScavenging.h"
      24             : #include "llvm/IR/Function.h"
      25             : #include "llvm/IR/LLVMContext.h"
      26             : 
      27             : using namespace llvm;
      28             : 
      29             : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
      30       59808 :   for (unsigned i = 0; PSets[i] != -1; ++i) {
      31       47348 :     if (PSets[i] == (int)PSetID)
      32             :       return true;
      33             :   }
      34             :   return false;
      35             : }
      36             : 
      37       24920 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
      38             :                                          BitVector &PressureSets) const {
      39       37380 :   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
      40       24920 :     const int *PSets = getRegUnitPressureSets(*U);
      41       24920 :     if (hasPressureSet(PSets, PSetID)) {
      42             :       PressureSets.set(PSetID);
      43             :       break;
      44             :     }
      45             :   }
      46       24920 : }
      47             : 
      48             : static cl::opt<bool> EnableSpillSGPRToSMEM(
      49             :   "amdgpu-spill-sgpr-to-smem",
      50             :   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
      51             :   cl::init(false));
      52             : 
      53             : static cl::opt<bool> EnableSpillSGPRToVGPR(
      54             :   "amdgpu-spill-sgpr-to-vgpr",
      55             :   cl::desc("Enable spilling VGPRs to SGPRs"),
      56             :   cl::ReallyHidden,
      57             :   cl::init(true));
      58             : 
      59        2492 : SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
      60             :   AMDGPURegisterInfo(),
      61             :   SGPRPressureSets(getNumRegPressureSets()),
      62             :   VGPRPressureSets(getNumRegPressureSets()),
      63             :   SpillSGPRToVGPR(false),
      64        2492 :   SpillSGPRToSMEM(false) {
      65        2492 :   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
      66           5 :     SpillSGPRToSMEM = true;
      67        2487 :   else if (EnableSpillSGPRToVGPR)
      68        2483 :     SpillSGPRToVGPR = true;
      69             : 
      70        2492 :   unsigned NumRegPressureSets = getNumRegPressureSets();
      71             : 
      72        2492 :   SGPRSetID = NumRegPressureSets;
      73        2492 :   VGPRSetID = NumRegPressureSets;
      74             : 
      75       14952 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      76       12460 :     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
      77       12460 :     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
      78             :   }
      79             : 
      80             :   // Determine the number of reg units for each pressure set.
      81        2492 :   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
      82     1084020 :   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
      83     1081528 :     const int *PSets = getRegUnitPressureSets(i);
      84     2945544 :     for (unsigned j = 0; PSets[j] != -1; ++j) {
      85     3728032 :       ++PressureSetRegUnits[PSets[j]];
      86             :     }
      87             :   }
      88             : 
      89             :   unsigned VGPRMax = 0, SGPRMax = 0;
      90       14952 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      91        4984 :     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
      92        2492 :       VGPRSetID = i;
      93        2492 :       VGPRMax = PressureSetRegUnits[i];
      94        2492 :       continue;
      95             :     }
      96       19936 :     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
      97        9968 :       SGPRSetID = i;
      98        9968 :       SGPRMax = PressureSetRegUnits[i];
      99             :     }
     100             :   }
     101             : 
     102             :   assert(SGPRSetID < NumRegPressureSets &&
     103             :          VGPRSetID < NumRegPressureSets);
     104        2492 : }
     105             : 
     106       18275 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
     107             :   const MachineFunction &MF) const {
     108             : 
     109       18275 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     110       18275 :   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
     111       18275 :   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     112       18275 :   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
     113             : }
     114             : 
     115             : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
     116             :   unsigned Reg;
     117             : 
     118             :   // Try to place it in a hole after PrivateSegmentBufferReg.
     119       18230 :   if (RegCount & 3) {
     120             :     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
     121             :     // alignment constraints, so we have a hole where can put the wave offset.
     122       18044 :     Reg = RegCount - 1;
     123             :   } else {
     124             :     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
     125             :     // wave offset before it.
     126         186 :     Reg = RegCount - 5;
     127             :   }
     128             : 
     129             :   return Reg;
     130             : }
     131             : 
     132       18230 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
     133             :   const MachineFunction &MF) const {
     134       18230 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     135       18230 :   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
     136       36460 :   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
     137             : }
     138             : 
     139        2193 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
     140             :   const MachineFunction &MF) const {
     141        2193 :   return AMDGPU::SGPR32;
     142             : }
     143             : 
     144       41111 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     145       41111 :   BitVector Reserved(getNumRegs());
     146             : 
     147             :   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
     148             :   // this seems likely to result in bugs, so I'm marking them as reserved.
     149       41111 :   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
     150       41111 :   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
     151             : 
     152             :   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
     153       41111 :   reserveRegisterTuples(Reserved, AMDGPU::M0);
     154             : 
     155             :   // Reserve the memory aperture registers.
     156       41111 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
     157       41111 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
     158       41111 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
     159       41111 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
     160             : 
     161             :   // Reserve xnack_mask registers - support is not implemented in Codegen.
     162       41111 :   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
     163             : 
     164             :   // Reserve Trap Handler registers - support is not implemented in Codegen.
     165       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TBA);
     166       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TMA);
     167       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
     168       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
     169       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
     170       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
     171       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
     172       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
     173       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
     174       41111 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
     175             : 
     176       41111 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     177             : 
     178       41111 :   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
     179       41111 :   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     180      201677 :   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     181      160566 :     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     182      160566 :     reserveRegisterTuples(Reserved, Reg);
     183             :   }
     184             : 
     185       41111 :   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
     186       41111 :   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
     187       47975 :   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     188        6864 :     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     189        6864 :     reserveRegisterTuples(Reserved, Reg);
     190             :   }
     191             : 
     192             :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     193             : 
     194       41111 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     195       41111 :   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
     196             :     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
     197       41111 :     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
     198             :   }
     199             : 
     200       41111 :   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
     201       41111 :   if (ScratchRSrcReg != AMDGPU::NoRegister) {
     202             :     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
     203             :     // to spill.
     204             :     // TODO: May need to reserve a VGPR if doing LDS spilling.
     205       41111 :     reserveRegisterTuples(Reserved, ScratchRSrcReg);
     206             :     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
     207             :   }
     208             : 
     209             :   // We have to assume the SP is needed in case there are calls in the function,
     210             :   // which is detected after the function is lowered. If we aren't really going
     211             :   // to need SP, don't bother reserving it.
     212       41111 :   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
     213             : 
     214       41111 :   if (StackPtrReg != AMDGPU::NoRegister) {
     215       41111 :     reserveRegisterTuples(Reserved, StackPtrReg);
     216             :     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
     217             :   }
     218             : 
     219       41111 :   unsigned FrameReg = MFI->getFrameOffsetReg();
     220       41111 :   if (FrameReg != AMDGPU::NoRegister) {
     221       41111 :     reserveRegisterTuples(Reserved, FrameReg);
     222             :     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
     223             :   }
     224             : 
     225       41111 :   return Reserved;
     226             : }
     227             : 
     228       39456 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
     229             :   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
     230       39456 :   if (Info->isEntryFunction()) {
     231       35922 :     const MachineFrameInfo &MFI = Fn.getFrameInfo();
     232       35922 :     return MFI.hasStackObjects() || MFI.hasCalls();
     233             :   }
     234             : 
     235             :   // May need scavenger for dealing with callee saved registers.
     236             :   return true;
     237             : }
     238             : 
     239       19728 : bool SIRegisterInfo::requiresFrameIndexScavenging(
     240             :   const MachineFunction &MF) const {
     241       19728 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     242       19728 :   if (MFI.hasStackObjects())
     243             :     return true;
     244             : 
     245             :   // May need to deal with callee saved registers.
     246             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     247       19021 :   return !Info->isEntryFunction();
     248             : }
     249             : 
     250       19331 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
     251             :   const MachineFunction &MF) const {
     252             :   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
     253             :   // create a virtual register for it during frame index elimination, so the
     254             :   // scavenger is directly needed.
     255       19331 :   return MF.getFrameInfo().hasStackObjects() &&
     256       19331 :          MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
     257         401 :          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
     258             : }
     259             : 
     260       19715 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
     261             :   const MachineFunction &) const {
     262             :   // There are no special dedicated stack or frame pointers.
     263       19715 :   return true;
     264             : }
     265             : 
     266       39494 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     267             :   // This helps catch bugs as verifier errors.
     268       39494 :   return true;
     269             : }
     270             : 
     271        4400 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
     272             :   assert(SIInstrInfo::isMUBUF(*MI));
     273             : 
     274        8800 :   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     275             :                                           AMDGPU::OpName::offset);
     276        8800 :   return MI->getOperand(OffIdx).getImm();
     277             : }
     278             : 
     279           4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
     280             :                                                  int Idx) const {
     281           4 :   if (!SIInstrInfo::isMUBUF(*MI))
     282             :     return 0;
     283             : 
     284             :   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     285             :                                            AMDGPU::OpName::vaddr) &&
     286             :          "Should never see frame index on non-address operand");
     287             : 
     288           4 :   return getMUBUFInstrOffset(MI);
     289             : }
     290             : 
     291        4739 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     292        4739 :   if (!MI->mayLoadOrStore())
     293             :     return false;
     294             : 
     295        4396 :   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
     296             : 
     297        4396 :   return !isUInt<12>(FullOffset);
     298             : }
     299             : 
     300           0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
     301             :                                                   unsigned BaseReg,
     302             :                                                   int FrameIdx,
     303             :                                                   int64_t Offset) const {
     304             :   MachineBasicBlock::iterator Ins = MBB->begin();
     305           0 :   DebugLoc DL; // Defaults to "unknown"
     306             : 
     307           0 :   if (Ins != MBB->end())
     308             :     DL = Ins->getDebugLoc();
     309             : 
     310           0 :   MachineFunction *MF = MBB->getParent();
     311           0 :   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
     312           0 :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     313             : 
     314           0 :   if (Offset == 0) {
     315           0 :     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
     316             :       .addFrameIndex(FrameIdx);
     317             :     return;
     318             :   }
     319             : 
     320           0 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     321           0 :   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     322             : 
     323           0 :   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     324             : 
     325           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     326             :     .addImm(Offset);
     327           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
     328             :     .addFrameIndex(FrameIdx);
     329             : 
     330           0 :   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     331           0 :     .addReg(OffsetReg, RegState::Kill)
     332           0 :     .addReg(FIReg);
     333             : }
     334             : 
     335           0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     336             :                                        int64_t Offset) const {
     337             : 
     338           0 :   MachineBasicBlock *MBB = MI.getParent();
     339           0 :   MachineFunction *MF = MBB->getParent();
     340           0 :   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
     341           0 :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     342             : 
     343             : #ifndef NDEBUG
     344             :   // FIXME: Is it possible to be storing a frame index to itself?
     345             :   bool SeenFI = false;
     346             :   for (const MachineOperand &MO: MI.operands()) {
     347             :     if (MO.isFI()) {
     348             :       if (SeenFI)
     349             :         llvm_unreachable("should not see multiple frame indices");
     350             : 
     351             :       SeenFI = true;
     352             :     }
     353             :   }
     354             : #endif
     355             : 
     356           0 :   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
     357             :   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
     358             :   assert(TII->isMUBUF(MI));
     359             :   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
     360             :          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
     361             :          "should only be seeing frame offset relative FrameIndex");
     362             : 
     363             : 
     364           0 :   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
     365           0 :   int64_t NewOffset = OffsetOp->getImm() + Offset;
     366             :   assert(isUInt<12>(NewOffset) && "offset should be legal");
     367             : 
     368           0 :   FIOp->ChangeToRegister(BaseReg, false);
     369             :   OffsetOp->setImm(NewOffset);
     370           0 : }
     371             : 
     372           0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     373             :                                         unsigned BaseReg,
     374             :                                         int64_t Offset) const {
     375           0 :   if (!SIInstrInfo::isMUBUF(*MI))
     376             :     return false;
     377             : 
     378           0 :   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
     379             : 
     380           0 :   return isUInt<12>(NewOffset);
     381             : }
     382             : 
     383           0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
     384             :   const MachineFunction &MF, unsigned Kind) const {
     385             :   // This is inaccurate. It depends on the instruction and address space. The
     386             :   // only place where we should hit this is for dealing with frame indexes /
     387             :   // private accesses, so this is correct in that case.
     388           0 :   return &AMDGPU::VGPR_32RegClass;
     389             : }
     390             : 
     391             : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
     392             : 
     393             :   switch (Op) {
     394             :   case AMDGPU::SI_SPILL_S512_SAVE:
     395             :   case AMDGPU::SI_SPILL_S512_RESTORE:
     396             :   case AMDGPU::SI_SPILL_V512_SAVE:
     397             :   case AMDGPU::SI_SPILL_V512_RESTORE:
     398             :     return 16;
     399             :   case AMDGPU::SI_SPILL_S256_SAVE:
     400             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     401             :   case AMDGPU::SI_SPILL_V256_SAVE:
     402             :   case AMDGPU::SI_SPILL_V256_RESTORE:
     403             :     return 8;
     404             :   case AMDGPU::SI_SPILL_S128_SAVE:
     405             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     406             :   case AMDGPU::SI_SPILL_V128_SAVE:
     407             :   case AMDGPU::SI_SPILL_V128_RESTORE:
     408             :     return 4;
     409             :   case AMDGPU::SI_SPILL_V96_SAVE:
     410             :   case AMDGPU::SI_SPILL_V96_RESTORE:
     411             :     return 3;
     412             :   case AMDGPU::SI_SPILL_S64_SAVE:
     413             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     414             :   case AMDGPU::SI_SPILL_V64_SAVE:
     415             :   case AMDGPU::SI_SPILL_V64_RESTORE:
     416             :     return 2;
     417             :   case AMDGPU::SI_SPILL_S32_SAVE:
     418             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     419             :   case AMDGPU::SI_SPILL_V32_SAVE:
     420             :   case AMDGPU::SI_SPILL_V32_RESTORE:
     421             :     return 1;
     422           0 :   default: llvm_unreachable("Invalid spill opcode");
     423             :   }
     424             : }
     425             : 
     426             : static int getOffsetMUBUFStore(unsigned Opc) {
     427           0 :   switch (Opc) {
     428             :   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
     429             :     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
     430             :   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
     431             :     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
     432             :   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
     433             :     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
     434             :   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
     435             :     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
     436             :   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
     437             :     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
     438             :   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
     439             :     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
     440             :   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
     441             :     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
     442             :   default:
     443             :     return -1;
     444             :   }
     445             : }
     446             : 
     447        1798 : static int getOffsetMUBUFLoad(unsigned Opc) {
     448        1798 :   switch (Opc) {
     449             :   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
     450             :     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
     451          74 :   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
     452          74 :     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
     453           8 :   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
     454           8 :     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
     455          24 :   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
     456          24 :     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
     457           2 :   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
     458           2 :     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
     459           2 :   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
     460           2 :     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
     461          16 :   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
     462          16 :     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
     463           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
     464           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
     465           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
     466           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
     467           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
     468           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
     469           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
     470           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
     471           3 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
     472           3 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
     473           4 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
     474           4 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
     475           0 :   default:
     476           0 :     return -1;
     477             :   }
     478             : }
     479             : 
     480             : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
     481             : // need to handle the case where an SGPR may need to be spilled while spilling.
     482           0 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     483             :                                       MachineFrameInfo &MFI,
     484             :                                       MachineBasicBlock::iterator MI,
     485             :                                       int Index,
     486             :                                       int64_t Offset) {
     487           0 :   MachineBasicBlock *MBB = MI->getParent();
     488             :   const DebugLoc &DL = MI->getDebugLoc();
     489           0 :   bool IsStore = MI->mayStore();
     490             : 
     491           0 :   unsigned Opc = MI->getOpcode();
     492           0 :   int LoadStoreOp = IsStore ?
     493           0 :     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
     494           0 :   if (LoadStoreOp == -1)
     495           0 :     return false;
     496             : 
     497           0 :   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
     498             :   MachineInstrBuilder NewMI =
     499           0 :       BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
     500             :           .add(*Reg)
     501           0 :           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
     502           0 :           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
     503             :           .addImm(Offset)
     504             :           .addImm(0) // glc
     505             :           .addImm(0) // slc
     506             :           .addImm(0) // tfe
     507             :           .cloneMemRefs(*MI);
     508             : 
     509           0 :   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
     510             :                                                        AMDGPU::OpName::vdata_in);
     511           0 :   if (VDataIn)
     512             :     NewMI.add(*VDataIn);
     513             :   return true;
     514             : }
     515             : 
     516        2543 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     517             :                                          unsigned LoadStoreOp,
     518             :                                          int Index,
     519             :                                          unsigned ValueReg,
     520             :                                          bool IsKill,
     521             :                                          unsigned ScratchRsrcReg,
     522             :                                          unsigned ScratchOffsetReg,
     523             :                                          int64_t InstOffset,
     524             :                                          MachineMemOperand *MMO,
     525             :                                          RegScavenger *RS) const {
     526        2543 :   MachineBasicBlock *MBB = MI->getParent();
     527        2543 :   MachineFunction *MF = MI->getParent()->getParent();
     528        2543 :   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
     529        2543 :   const SIInstrInfo *TII = ST.getInstrInfo();
     530        2543 :   const MachineFrameInfo &MFI = MF->getFrameInfo();
     531             : 
     532        2543 :   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
     533             :   const DebugLoc &DL = MI->getDebugLoc();
     534        2543 :   bool IsStore = Desc.mayStore();
     535             : 
     536             :   bool Scavenged = false;
     537             :   unsigned SOffset = ScratchOffsetReg;
     538             : 
     539             :   const unsigned EltSize = 4;
     540        2543 :   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
     541        5086 :   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
     542        2543 :   unsigned Size = NumSubRegs * EltSize;
     543        2543 :   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
     544             :   int64_t ScratchOffsetRegDelta = 0;
     545             : 
     546             :   unsigned Align = MFI.getObjectAlignment(Index);
     547             :   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
     548             : 
     549             :   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
     550             : 
     551        2543 :   if (!isUInt<12>(Offset + Size - EltSize)) {
     552             :     SOffset = AMDGPU::NoRegister;
     553             : 
     554             :     // We currently only support spilling VGPRs to EltSize boundaries, meaning
     555             :     // we can simplify the adjustment of Offset here to just scale with
     556             :     // WavefrontSize.
     557         240 :     Offset *= ST.getWavefrontSize();
     558             : 
     559             :     // We don't have access to the register scavenger if this function is called
     560             :     // during  PEI::scavengeFrameVirtualRegs().
     561         240 :     if (RS)
     562           0 :       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
     563             : 
     564           0 :     if (SOffset == AMDGPU::NoRegister) {
     565             :       // There are no free SGPRs, and since we are in the process of spilling
     566             :       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
     567             :       // on SI/CI and on VI it is true until we implement spilling using scalar
     568             :       // stores), we have no way to free up an SGPR.  Our solution here is to
     569             :       // add the offset directly to the ScratchOffset register, and then
     570             :       // subtract the offset after the spill to return ScratchOffset to it's
     571             :       // original value.
     572             :       SOffset = ScratchOffsetReg;
     573             :       ScratchOffsetRegDelta = Offset;
     574             :     } else {
     575             :       Scavenged = true;
     576             :     }
     577             : 
     578         720 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
     579         240 :       .addReg(ScratchOffsetReg)
     580             :       .addImm(Offset);
     581             : 
     582             :     Offset = 0;
     583             :   }
     584             : 
     585        9119 :   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     586        6576 :     unsigned SubReg = NumSubRegs == 1 ?
     587        5388 :       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
     588             : 
     589             :     unsigned SOffsetRegState = 0;
     590             :     unsigned SrcDstRegState = getDefRegState(!IsStore);
     591        6576 :     if (i + 1 == e) {
     592             :       SOffsetRegState |= getKillRegState(Scavenged);
     593             :       // The last implicit use carries the "Kill" flag.
     594        2543 :       SrcDstRegState |= getKillRegState(IsKill);
     595             :     }
     596             : 
     597        6576 :     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
     598             :     MachineMemOperand *NewMMO
     599        6576 :       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
     600             :                                  EltSize, MinAlign(Align, EltSize * i));
     601             : 
     602        6576 :     auto MIB = BuildMI(*MBB, MI, DL, Desc)
     603        6576 :       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
     604        6576 :       .addReg(ScratchRsrcReg)
     605        6576 :       .addReg(SOffset, SOffsetRegState)
     606             :       .addImm(Offset)
     607             :       .addImm(0) // glc
     608             :       .addImm(0) // slc
     609             :       .addImm(0) // tfe
     610        6576 :       .addMemOperand(NewMMO);
     611             : 
     612        6576 :     if (NumSubRegs > 1)
     613        5388 :       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
     614             :   }
     615             : 
     616        2543 :   if (ScratchOffsetRegDelta != 0) {
     617             :     // Subtract the offset we added to the ScratchOffset register.
     618         720 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
     619         240 :         .addReg(ScratchOffsetReg)
     620             :         .addImm(ScratchOffsetRegDelta);
     621             :   }
     622        2543 : }
     623             : 
     624             : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
     625             :                                                      bool Store) {
     626          28 :   if (SuperRegSize % 16 == 0) {
     627             :     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
     628             :                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
     629             :   }
     630             : 
     631          22 :   if (SuperRegSize % 8 == 0) {
     632             :     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
     633             :                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
     634             :   }
     635             : 
     636             :   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
     637             :                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
     638             : }
     639             : 
     640         690 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
     641             :                                int Index,
     642             :                                RegScavenger *RS,
     643             :                                bool OnlyToVGPR) const {
     644         690 :   MachineBasicBlock *MBB = MI->getParent();
     645         690 :   MachineFunction *MF = MBB->getParent();
     646         690 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     647             :   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
     648             : 
     649             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     650         690 :     = MFI->getSGPRToVGPRSpills(Index);
     651         690 :   bool SpillToVGPR = !VGPRSpills.empty();
     652         690 :   if (OnlyToVGPR && !SpillToVGPR)
     653             :     return false;
     654             : 
     655         690 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     656         690 :   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
     657         690 :   const SIInstrInfo *TII = ST.getInstrInfo();
     658             : 
     659         690 :   unsigned SuperReg = MI->getOperand(0).getReg();
     660             :   bool IsKill = MI->getOperand(0).isKill();
     661             :   const DebugLoc &DL = MI->getDebugLoc();
     662             : 
     663         690 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     664             : 
     665         690 :   bool SpillToSMEM = spillSGPRToSMEM();
     666         690 :   if (SpillToSMEM && OnlyToVGPR)
     667             :     return false;
     668             : 
     669             :   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
     670             :                          SuperReg != MFI->getFrameOffsetReg() &&
     671             :                          SuperReg != MFI->getScratchWaveOffsetReg()));
     672             : 
     673             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     674             : 
     675             :   unsigned OffsetReg = AMDGPU::M0;
     676             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     677             : 
     678         690 :   if (SpillToSMEM) {
     679          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     680          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     681          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     682          14 :         .addReg(AMDGPU::M0);
     683             :     }
     684             :   }
     685             : 
     686             :   unsigned ScalarStoreOp;
     687         690 :   unsigned EltSize = 4;
     688         690 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     689         704 :   if (SpillToSMEM && isSGPRClass(RC)) {
     690             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     691             :     // able to spill wider vmem spills.
     692             :     std::tie(EltSize, ScalarStoreOp) =
     693          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
     694             :   }
     695             : 
     696         690 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     697         690 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     698             : 
     699             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     700         690 :   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
     701        1981 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     702        1291 :     unsigned SubReg = NumSubRegs == 1 ?
     703        1580 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     704             : 
     705        1291 :     if (SpillToSMEM) {
     706             :       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     707             : 
     708             :       // The allocated memory size is really the wavefront size * the frame
     709             :       // index size. The widest register class is 64 bytes, so a 4-byte scratch
     710             :       // allocation is enough to spill this in a single stack object.
     711             :       //
     712             :       // FIXME: Frame size/offsets are computed earlier than this, so the extra
     713             :       // space is still unnecessarily allocated.
     714             : 
     715             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     716             :       MachinePointerInfo PtrInfo
     717          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     718             :       MachineMemOperand *MMO
     719          15 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     720             :                                    EltSize, MinAlign(Align, EltSize * i));
     721             : 
     722             :       // SMEM instructions only support a single offset, so increment the wave
     723             :       // offset.
     724             : 
     725          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     726          15 :       if (Offset != 0) {
     727          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     728          15 :           .addReg(MFI->getFrameOffsetReg())
     729             :           .addImm(Offset);
     730             :       } else {
     731           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     732           0 :           .addReg(MFI->getFrameOffsetReg());
     733             :       }
     734             : 
     735          45 :       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
     736          15 :         .addReg(SubReg, getKillRegState(IsKill)) // sdata
     737          15 :         .addReg(MFI->getScratchRSrcReg())        // sbase
     738          15 :         .addReg(OffsetReg, RegState::Kill)       // soff
     739             :         .addImm(0)                               // glc
     740             :         .addMemOperand(MMO);
     741             : 
     742             :       continue;
     743             :     }
     744             : 
     745        1276 :     if (SpillToVGPR) {
     746        2388 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     747             : 
     748             :       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
     749             :       // only circumstance in which we say it is undefined is when it is the
     750             :       // first spill to this VGPR in the first basic block.
     751             :       bool VGPRDefined = true;
     752        1194 :       if (MBB == &MF->front())
     753        1146 :         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
     754             : 
     755             :       // Mark the "old value of vgpr" input undef only if this is the first sgpr
     756             :       // spill to this specific vgpr in the first basic block.
     757        1194 :       BuildMI(*MBB, MI, DL,
     758             :               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
     759        1194 :               Spill.VGPR)
     760        1194 :         .addReg(SubReg, getKillRegState(IsKill))
     761        1194 :         .addImm(Spill.Lane)
     762        1813 :         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
     763             : 
     764             :       // FIXME: Since this spills to another register instead of an actual
     765             :       // frame index, we should delete the frame index when all references to
     766             :       // it are fixed.
     767             :     } else {
     768             :       // XXX - Can to VGPR spill fail for some subregisters but not others?
     769          82 :       if (OnlyToVGPR)
     770           0 :         return false;
     771             : 
     772             :       // Spill SGPR to a frame index.
     773             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     774          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     775             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     776             : 
     777             :       MachineInstrBuilder Mov
     778         164 :         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
     779          82 :         .addReg(SubReg, SubKillState);
     780             : 
     781             : 
     782             :       // There could be undef components of a spilled super register.
     783             :       // TODO: Can we detect this and skip the spill?
     784          82 :       if (NumSubRegs > 1) {
     785             :         // The last implicit use of the SuperReg carries the "Kill" flag.
     786             :         unsigned SuperKillState = 0;
     787          76 :         if (i + 1 == e)
     788             :           SuperKillState |= getKillRegState(IsKill);
     789          76 :         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
     790             :       }
     791             : 
     792             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     793             :       MachinePointerInfo PtrInfo
     794          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     795             :       MachineMemOperand *MMO
     796          82 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     797             :                                    EltSize, MinAlign(Align, EltSize * i));
     798         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
     799          82 :         .addReg(TmpReg, RegState::Kill)    // src
     800             :         .addFrameIndex(Index)              // vaddr
     801          82 :         .addReg(MFI->getScratchRSrcReg())  // srrsrc
     802          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     803          82 :         .addImm(i * 4)                     // offset
     804             :         .addMemOperand(MMO);
     805             :     }
     806             :   }
     807             : 
     808         690 :   if (M0CopyReg != AMDGPU::NoRegister) {
     809          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     810          14 :       .addReg(M0CopyReg, RegState::Kill);
     811             :   }
     812             : 
     813         690 :   MI->eraseFromParent();
     814             :   MFI->addToSpilledSGPRs(NumSubRegs);
     815         690 :   return true;
     816             : }
     817             : 
     818         680 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
     819             :                                  int Index,
     820             :                                  RegScavenger *RS,
     821             :                                  bool OnlyToVGPR) const {
     822         680 :   MachineFunction *MF = MI->getParent()->getParent();
     823         680 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     824             :   MachineBasicBlock *MBB = MI->getParent();
     825         680 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     826             : 
     827             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     828         680 :     = MFI->getSGPRToVGPRSpills(Index);
     829         680 :   bool SpillToVGPR = !VGPRSpills.empty();
     830         680 :   if (OnlyToVGPR && !SpillToVGPR)
     831             :     return false;
     832             : 
     833         680 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     834         680 :   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
     835         680 :   const SIInstrInfo *TII = ST.getInstrInfo();
     836             :   const DebugLoc &DL = MI->getDebugLoc();
     837             : 
     838         680 :   unsigned SuperReg = MI->getOperand(0).getReg();
     839         680 :   bool SpillToSMEM = spillSGPRToSMEM();
     840         680 :   if (SpillToSMEM && OnlyToVGPR)
     841             :     return false;
     842             : 
     843             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     844             : 
     845             :   unsigned OffsetReg = AMDGPU::M0;
     846             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     847             : 
     848         680 :   if (SpillToSMEM) {
     849          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     850          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     851          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     852          14 :         .addReg(AMDGPU::M0);
     853             :     }
     854             :   }
     855             : 
     856         680 :   unsigned EltSize = 4;
     857             :   unsigned ScalarLoadOp;
     858             : 
     859         680 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     860         694 :   if (SpillToSMEM && isSGPRClass(RC)) {
     861             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     862             :     // able to spill wider vmem spills.
     863             :     std::tie(EltSize, ScalarLoadOp) =
     864          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
     865             :   }
     866             : 
     867         680 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     868         680 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     869             : 
     870             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     871             :   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     872             : 
     873        1956 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     874        1276 :     unsigned SubReg = NumSubRegs == 1 ?
     875        1560 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     876             : 
     877        1276 :     if (SpillToSMEM) {
     878             :       // FIXME: Size may be > 4 but extra bytes wasted.
     879             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     880             :       MachinePointerInfo PtrInfo
     881          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     882             :       MachineMemOperand *MMO
     883          15 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
     884             :                                    EltSize, MinAlign(Align, EltSize * i));
     885             : 
     886             :       // Add i * 4 offset
     887          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     888          15 :       if (Offset != 0) {
     889          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     890          15 :           .addReg(MFI->getFrameOffsetReg())
     891             :           .addImm(Offset);
     892             :       } else {
     893           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     894           0 :           .addReg(MFI->getFrameOffsetReg());
     895             :       }
     896             : 
     897             :       auto MIB =
     898          30 :         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
     899          15 :         .addReg(MFI->getScratchRSrcReg()) // sbase
     900          15 :         .addReg(OffsetReg, RegState::Kill)                // soff
     901             :         .addImm(0)                        // glc
     902          15 :         .addMemOperand(MMO);
     903             : 
     904          15 :       if (NumSubRegs > 1)
     905           2 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     906             : 
     907             :       continue;
     908             :     }
     909             : 
     910        1261 :     if (SpillToVGPR) {
     911        2358 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     912             :       auto MIB =
     913        1179 :         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
     914        1179 :                 SubReg)
     915        1179 :         .addReg(Spill.VGPR)
     916        1179 :         .addImm(Spill.Lane);
     917             : 
     918        1179 :       if (NumSubRegs > 1)
     919         702 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     920             :     } else {
     921          82 :       if (OnlyToVGPR)
     922           0 :         return false;
     923             : 
     924             :       // Restore SGPR from a stack slot.
     925             :       // FIXME: We should use S_LOAD_DWORD here for VI.
     926          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     927             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     928             : 
     929             :       MachinePointerInfo PtrInfo
     930          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     931             : 
     932          82 :       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
     933             :         MachineMemOperand::MOLoad, EltSize,
     934             :         MinAlign(Align, EltSize * i));
     935             : 
     936         164 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
     937             :         .addFrameIndex(Index)              // vaddr
     938          82 :         .addReg(MFI->getScratchRSrcReg())  // srsrc
     939          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     940          82 :         .addImm(i * 4)                     // offset
     941             :         .addMemOperand(MMO);
     942             : 
     943             :       auto MIB =
     944         164 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
     945          82 :         .addReg(TmpReg, RegState::Kill);
     946             : 
     947          82 :       if (NumSubRegs > 1)
     948          76 :         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
     949             :     }
     950             :   }
     951             : 
     952         680 :   if (M0CopyReg != AMDGPU::NoRegister) {
     953          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     954          14 :       .addReg(M0CopyReg, RegState::Kill);
     955             :   }
     956             : 
     957         680 :   MI->eraseFromParent();
     958         680 :   return true;
     959             : }
     960             : 
     961             : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
     962             : /// a VGPR and the stack slot can be safely eliminated when all other users are
     963             : /// handled.
     964        1278 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
     965             :   MachineBasicBlock::iterator MI,
     966             :   int FI,
     967             :   RegScavenger *RS) const {
     968        1278 :   switch (MI->getOpcode()) {
     969         644 :   case AMDGPU::SI_SPILL_S512_SAVE:
     970             :   case AMDGPU::SI_SPILL_S256_SAVE:
     971             :   case AMDGPU::SI_SPILL_S128_SAVE:
     972             :   case AMDGPU::SI_SPILL_S64_SAVE:
     973             :   case AMDGPU::SI_SPILL_S32_SAVE:
     974         644 :     return spillSGPR(MI, FI, RS, true);
     975         634 :   case AMDGPU::SI_SPILL_S512_RESTORE:
     976             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     977             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     978             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     979             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     980         634 :     return restoreSGPR(MI, FI, RS, true);
     981           0 :   default:
     982           0 :     llvm_unreachable("not an SGPR spill instruction");
     983             :   }
     984             : }
     985             : 
     986        7834 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     987             :                                         int SPAdj, unsigned FIOperandNum,
     988             :                                         RegScavenger *RS) const {
     989        7834 :   MachineFunction *MF = MI->getParent()->getParent();
     990        7834 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     991             :   MachineBasicBlock *MBB = MI->getParent();
     992        7834 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     993        7834 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     994        7834 :   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
     995        7834 :   const SIInstrInfo *TII = ST.getInstrInfo();
     996             :   DebugLoc DL = MI->getDebugLoc();
     997             : 
     998        7834 :   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
     999        7834 :   int Index = MI->getOperand(FIOperandNum).getIndex();
    1000             : 
    1001       15668 :   switch (MI->getOpcode()) {
    1002             :     // SGPR register spill
    1003          46 :     case AMDGPU::SI_SPILL_S512_SAVE:
    1004             :     case AMDGPU::SI_SPILL_S256_SAVE:
    1005             :     case AMDGPU::SI_SPILL_S128_SAVE:
    1006             :     case AMDGPU::SI_SPILL_S64_SAVE:
    1007             :     case AMDGPU::SI_SPILL_S32_SAVE: {
    1008          46 :       spillSGPR(MI, Index, RS);
    1009          46 :       break;
    1010             :     }
    1011             : 
    1012             :     // SGPR register restore
    1013          46 :     case AMDGPU::SI_SPILL_S512_RESTORE:
    1014             :     case AMDGPU::SI_SPILL_S256_RESTORE:
    1015             :     case AMDGPU::SI_SPILL_S128_RESTORE:
    1016             :     case AMDGPU::SI_SPILL_S64_RESTORE:
    1017             :     case AMDGPU::SI_SPILL_S32_RESTORE: {
    1018          46 :       restoreSGPR(MI, Index, RS);
    1019          46 :       break;
    1020             :     }
    1021             : 
    1022             :     // VGPR register spill
    1023             :     case AMDGPU::SI_SPILL_V512_SAVE:
    1024             :     case AMDGPU::SI_SPILL_V256_SAVE:
    1025             :     case AMDGPU::SI_SPILL_V128_SAVE:
    1026             :     case AMDGPU::SI_SPILL_V96_SAVE:
    1027             :     case AMDGPU::SI_SPILL_V64_SAVE:
    1028             :     case AMDGPU::SI_SPILL_V32_SAVE: {
    1029        1316 :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1030             :                                                          AMDGPU::OpName::vdata);
    1031        2632 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
    1032             :             Index,
    1033             :             VData->getReg(), VData->isKill(),
    1034             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1035             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1036             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1037             :             *MI->memoperands_begin(),
    1038             :             RS);
    1039        1316 :       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
    1040        1316 :       MI->eraseFromParent();
    1041        1316 :       break;
    1042             :     }
    1043             :     case AMDGPU::SI_SPILL_V32_RESTORE:
    1044             :     case AMDGPU::SI_SPILL_V64_RESTORE:
    1045             :     case AMDGPU::SI_SPILL_V96_RESTORE:
    1046             :     case AMDGPU::SI_SPILL_V128_RESTORE:
    1047             :     case AMDGPU::SI_SPILL_V256_RESTORE:
    1048             :     case AMDGPU::SI_SPILL_V512_RESTORE: {
    1049        1227 :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1050             :                                                          AMDGPU::OpName::vdata);
    1051             : 
    1052        2454 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
    1053             :             Index,
    1054             :             VData->getReg(), VData->isKill(),
    1055             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1056             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1057             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1058             :             *MI->memoperands_begin(),
    1059             :             RS);
    1060        1227 :       MI->eraseFromParent();
    1061        1227 :       break;
    1062             :     }
    1063             : 
    1064             :     default: {
    1065             :       const DebugLoc &DL = MI->getDebugLoc();
    1066             :       bool IsMUBUF = TII->isMUBUF(*MI);
    1067             : 
    1068        5199 :       if (!IsMUBUF &&
    1069         343 :           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
    1070             :         // Convert to an absolute stack address by finding the offset from the
    1071             :         // scratch wave base and scaling by the wave size.
    1072             :         //
    1073             :         // In an entry function/kernel the stack address is already the
    1074             :         // absolute address relative to the scratch wave offset.
    1075             : 
    1076             :         unsigned DiffReg
    1077          36 :           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1078             : 
    1079          36 :         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
    1080          36 :         unsigned ResultReg = IsCopy ?
    1081          31 :           MI->getOperand(0).getReg() :
    1082           5 :           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1083             : 
    1084         108 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
    1085          36 :           .addReg(MFI->getFrameOffsetReg())
    1086          36 :           .addReg(MFI->getScratchWaveOffsetReg());
    1087             : 
    1088             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1089          36 :         if (Offset == 0) {
    1090             :           // XXX - This never happens because of emergency scavenging slot at 0?
    1091           0 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
    1092           0 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1093           0 :             .addReg(DiffReg);
    1094             :         } else {
    1095             :           unsigned ScaledReg
    1096          36 :             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1097             : 
    1098          72 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
    1099          72 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1100          36 :             .addReg(DiffReg, RegState::Kill);
    1101             : 
    1102             :           // TODO: Fold if use instruction is another add of a constant.
    1103          36 :           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
    1104          64 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1105             :               .addImm(Offset)
    1106          32 :               .addReg(ScaledReg, RegState::Kill);
    1107             :           } else {
    1108             :             unsigned ConstOffsetReg
    1109           4 :               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1110             : 
    1111           8 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
    1112             :               .addImm(Offset);
    1113           8 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1114           4 :               .addReg(ConstOffsetReg, RegState::Kill)
    1115           4 :               .addReg(ScaledReg, RegState::Kill);
    1116             :           }
    1117             :         }
    1118             : 
    1119             :         // Don't introduce an extra copy if we're just materializing in a mov.
    1120          36 :         if (IsCopy)
    1121          31 :           MI->eraseFromParent();
    1122             :         else
    1123           5 :           FIOp.ChangeToRegister(ResultReg, false, false, true);
    1124          36 :         return;
    1125             :       }
    1126             : 
    1127        5163 :       if (IsMUBUF) {
    1128             :         // Disable offen so we don't need a 0 vgpr base.
    1129             :         assert(static_cast<int>(FIOperandNum) ==
    1130             :                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    1131             :                                           AMDGPU::OpName::vaddr));
    1132             : 
    1133             :         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
    1134             :                == MFI->getFrameOffsetReg());
    1135             : 
    1136             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1137             :         int64_t OldImm
    1138        4856 :           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
    1139        4856 :         int64_t NewOffset = OldImm + Offset;
    1140             : 
    1141        9696 :         if (isUInt<12>(NewOffset) &&
    1142        4840 :             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
    1143        4840 :           MI->eraseFromParent();
    1144        4840 :           return;
    1145             :         }
    1146             :       }
    1147             : 
    1148             :       // If the offset is simply too big, don't convert to a scratch wave offset
    1149             :       // relative index.
    1150             : 
    1151             :       int64_t Offset = FrameInfo.getObjectOffset(Index);
    1152         323 :       FIOp.ChangeToImmediate(Offset);
    1153         323 :       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
    1154          16 :         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1155          32 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    1156             :           .addImm(Offset);
    1157          16 :         FIOp.ChangeToRegister(TmpReg, false, false, true);
    1158             :       }
    1159             :     }
    1160             :   }
    1161             : }
    1162             : 
    1163     4304619 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
    1164             :   #define AMDGPU_REG_ASM_NAMES
    1165             :   #include "AMDGPURegAsmNames.inc.cpp"
    1166             : 
    1167             :   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
    1168             :     if (Reg >= BeginReg && Reg <= EndReg) {                \
    1169             :       unsigned Index = Reg - BeginReg;                     \
    1170             :       assert(Index < array_lengthof(RegTable));            \
    1171             :       return RegTable[Index];                              \
    1172             :     }
    1173             : 
    1174     5354437 :   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
    1175     4194963 :   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
    1176     2943589 :   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
    1177     1941229 :   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
    1178     1430149 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
    1179             :             VGPR96RegNames);
    1180             : 
    1181     1737570 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
    1182             :             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
    1183             :             VGPR128RegNames);
    1184     1185810 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
    1185             :             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
    1186             :             SGPR128RegNames);
    1187             : 
    1188     1361187 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
    1189             :             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1190             :             VGPR256RegNames);
    1191             : 
    1192     1048994 :   REG_RANGE(
    1193             :     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
    1194             :     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1195             :     VGPR512RegNames);
    1196             : 
    1197      527672 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
    1198             :             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1199             :             SGPR256RegNames);
    1200             : 
    1201      462200 :   REG_RANGE(
    1202             :     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
    1203             :     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1204             :     SGPR512RegNames
    1205             :   );
    1206             : 
    1207             : #undef REG_RANGE
    1208             : 
    1209             :   // FIXME: Rename flat_scr so we don't need to special case this.
    1210      351320 :   switch (Reg) {
    1211             :   case AMDGPU::FLAT_SCR:
    1212        3679 :     return "flat_scratch";
    1213             :   case AMDGPU::FLAT_SCR_LO:
    1214        7653 :     return "flat_scratch_lo";
    1215             :   case AMDGPU::FLAT_SCR_HI:
    1216        7653 :     return "flat_scratch_hi";
    1217      332335 :   default:
    1218             :     // For the special named registers the default is fine.
    1219      332335 :     return TargetRegisterInfo::getRegAsmName(Reg);
    1220             :   }
    1221             : }
    1222             : 
    1223             : // FIXME: This is very slow. It might be worth creating a map from physreg to
    1224             : // register class.
    1225     5681815 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
    1226             :   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
    1227             : 
    1228             :   static const TargetRegisterClass *const BaseClasses[] = {
    1229             :     &AMDGPU::VGPR_32RegClass,
    1230             :     &AMDGPU::SReg_32RegClass,
    1231             :     &AMDGPU::VReg_64RegClass,
    1232             :     &AMDGPU::SReg_64RegClass,
    1233             :     &AMDGPU::VReg_96RegClass,
    1234             :     &AMDGPU::VReg_128RegClass,
    1235             :     &AMDGPU::SReg_128RegClass,
    1236             :     &AMDGPU::VReg_256RegClass,
    1237             :     &AMDGPU::SReg_256RegClass,
    1238             :     &AMDGPU::VReg_512RegClass,
    1239             :     &AMDGPU::SReg_512RegClass,
    1240             :     &AMDGPU::SCC_CLASSRegClass,
    1241             :     &AMDGPU::Pseudo_SReg_32RegClass,
    1242             :     &AMDGPU::Pseudo_SReg_128RegClass,
    1243             :   };
    1244             : 
    1245    17779884 :   for (const TargetRegisterClass *BaseClass : BaseClasses) {
    1246    17779884 :     if (BaseClass->contains(Reg)) {
    1247     5681815 :       return BaseClass;
    1248             :     }
    1249             :   }
    1250             :   return nullptr;
    1251             : }
    1252             : 
    1253             : // TODO: It might be helpful to have some target specific flags in
    1254             : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
    1255    12948201 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
    1256             :   unsigned Size = getRegSizeInBits(*RC);
    1257    12948201 :   if (Size < 32)
    1258             :     return false;
    1259    12939600 :   switch (Size) {
    1260     6908467 :   case 32:
    1261     6908467 :     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
    1262     4120946 :   case 64:
    1263     4120946 :     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
    1264        1293 :   case 96:
    1265        1293 :     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
    1266     1671759 :   case 128:
    1267     1671759 :     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
    1268      192506 :   case 256:
    1269      192506 :     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
    1270       44629 :   case 512:
    1271       44629 :     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
    1272           0 :   default:
    1273           0 :     llvm_unreachable("Invalid register class size");
    1274             :   }
    1275             : }
    1276             : 
    1277      152473 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
    1278             :                                          const TargetRegisterClass *SRC) const {
    1279      152473 :   switch (getRegSizeInBits(*SRC)) {
    1280             :   case 32:
    1281             :     return &AMDGPU::VGPR_32RegClass;
    1282       17178 :   case 64:
    1283       17178 :     return &AMDGPU::VReg_64RegClass;
    1284           0 :   case 96:
    1285           0 :     return &AMDGPU::VReg_96RegClass;
    1286        5977 :   case 128:
    1287        5977 :     return &AMDGPU::VReg_128RegClass;
    1288          98 :   case 256:
    1289          98 :     return &AMDGPU::VReg_256RegClass;
    1290          17 :   case 512:
    1291          17 :     return &AMDGPU::VReg_512RegClass;
    1292           0 :   default:
    1293           0 :     llvm_unreachable("Invalid register class size");
    1294             :   }
    1295             : }
    1296             : 
    1297        2383 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
    1298             :                                          const TargetRegisterClass *VRC) const {
    1299        2383 :   switch (getRegSizeInBits(*VRC)) {
    1300             :   case 32:
    1301             :     return &AMDGPU::SGPR_32RegClass;
    1302         224 :   case 64:
    1303         224 :     return &AMDGPU::SReg_64RegClass;
    1304          12 :   case 128:
    1305          12 :     return &AMDGPU::SReg_128RegClass;
    1306           2 :   case 256:
    1307           2 :     return &AMDGPU::SReg_256RegClass;
    1308           0 :   case 512:
    1309           0 :     return &AMDGPU::SReg_512RegClass;
    1310           0 :   default:
    1311           0 :     llvm_unreachable("Invalid register class size");
    1312             :   }
    1313             : }
    1314             : 
    1315      416677 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
    1316             :                          const TargetRegisterClass *RC, unsigned SubIdx) const {
    1317      416677 :   if (SubIdx == AMDGPU::NoSubRegister)
    1318             :     return RC;
    1319             : 
    1320             :   // We can assume that each lane corresponds to one 32-bit register.
    1321       69895 :   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
    1322       69895 :   if (isSGPRClass(RC)) {
    1323       32161 :     switch (Count) {
    1324             :     case 1:
    1325             :       return &AMDGPU::SGPR_32RegClass;
    1326          20 :     case 2:
    1327          20 :       return &AMDGPU::SReg_64RegClass;
    1328           0 :     case 4:
    1329           0 :       return &AMDGPU::SReg_128RegClass;
    1330           0 :     case 8:
    1331           0 :       return &AMDGPU::SReg_256RegClass;
    1332           0 :     case 16: /* fall-through */
    1333             :     default:
    1334           0 :       llvm_unreachable("Invalid sub-register class size");
    1335             :     }
    1336             :   } else {
    1337       37734 :     switch (Count) {
    1338             :     case 1:
    1339             :       return &AMDGPU::VGPR_32RegClass;
    1340          78 :     case 2:
    1341          78 :       return &AMDGPU::VReg_64RegClass;
    1342           0 :     case 3:
    1343           0 :       return &AMDGPU::VReg_96RegClass;
    1344           0 :     case 4:
    1345           0 :       return &AMDGPU::VReg_128RegClass;
    1346           0 :     case 8:
    1347           0 :       return &AMDGPU::VReg_256RegClass;
    1348           0 :     case 16: /* fall-through */
    1349             :     default:
    1350           0 :       llvm_unreachable("Invalid sub-register class size");
    1351             :     }
    1352             :   }
    1353             : }
    1354             : 
    1355      561380 : bool SIRegisterInfo::shouldRewriteCopySrc(
    1356             :   const TargetRegisterClass *DefRC,
    1357             :   unsigned DefSubReg,
    1358             :   const TargetRegisterClass *SrcRC,
    1359             :   unsigned SrcSubReg) const {
    1360             :   // We want to prefer the smallest register class possible, so we don't want to
    1361             :   // stop and rewrite on anything that looks like a subregister
    1362             :   // extract. Operations mostly don't care about the super register class, so we
    1363             :   // only want to stop on the most basic of copies between the same register
    1364             :   // class.
    1365             :   //
    1366             :   // e.g. if we have something like
    1367             :   // %0 = ...
    1368             :   // %1 = ...
    1369             :   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
    1370             :   // %3 = COPY %2, sub0
    1371             :   //
    1372             :   // We want to look through the COPY to find:
    1373             :   //  => %3 = COPY %0
    1374             : 
    1375             :   // Plain copy.
    1376      561380 :   return getCommonSubClass(DefRC, SrcRC) != nullptr;
    1377             : }
    1378             : 
    1379             : /// Returns a register that is not used at any point in the function.
    1380             : ///        If all registers are used, then this function will return
    1381             : //         AMDGPU::NoRegister.
    1382             : unsigned
    1383         150 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
    1384             :                                    const TargetRegisterClass *RC,
    1385             :                                    const MachineFunction &MF) const {
    1386             : 
    1387        4609 :   for (unsigned Reg : *RC)
    1388        4605 :     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
    1389         146 :       return Reg;
    1390             :   return AMDGPU::NoRegister;
    1391             : }
    1392             : 
    1393        6586 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
    1394             :                                                    unsigned EltSize) const {
    1395        6586 :   if (EltSize == 4) {
    1396             :     static const int16_t Sub0_15[] = {
    1397             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1398             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1399             :       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
    1400             :       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
    1401             :     };
    1402             : 
    1403             :     static const int16_t Sub0_7[] = {
    1404             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1405             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1406             :     };
    1407             : 
    1408             :     static const int16_t Sub0_3[] = {
    1409             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1410             :     };
    1411             : 
    1412             :     static const int16_t Sub0_2[] = {
    1413             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
    1414             :     };
    1415             : 
    1416             :     static const int16_t Sub0_1[] = {
    1417             :       AMDGPU::sub0, AMDGPU::sub1,
    1418             :     };
    1419             : 
    1420        6372 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1421         977 :     case 32:
    1422         977 :       return {};
    1423             :     case 64:
    1424             :       return makeArrayRef(Sub0_1);
    1425             :     case 96:
    1426             :       return makeArrayRef(Sub0_2);
    1427             :     case 128:
    1428             :       return makeArrayRef(Sub0_3);
    1429             :     case 256:
    1430             :       return makeArrayRef(Sub0_7);
    1431             :     case 512:
    1432             :       return makeArrayRef(Sub0_15);
    1433           0 :     default:
    1434           0 :       llvm_unreachable("unhandled register size");
    1435             :     }
    1436             :   }
    1437             : 
    1438         214 :   if (EltSize == 8) {
    1439             :     static const int16_t Sub0_15_64[] = {
    1440             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1441             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
    1442             :       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
    1443             :       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
    1444             :     };
    1445             : 
    1446             :     static const int16_t Sub0_7_64[] = {
    1447             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1448             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
    1449             :     };
    1450             : 
    1451             : 
    1452             :     static const int16_t Sub0_3_64[] = {
    1453             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
    1454             :     };
    1455             : 
    1456         208 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1457          16 :     case 64:
    1458          16 :       return {};
    1459             :     case 128:
    1460             :       return makeArrayRef(Sub0_3_64);
    1461             :     case 256:
    1462             :       return makeArrayRef(Sub0_7_64);
    1463             :     case 512:
    1464             :       return makeArrayRef(Sub0_15_64);
    1465           0 :     default:
    1466           0 :       llvm_unreachable("unhandled register size");
    1467             :     }
    1468             :   }
    1469             : 
    1470             :   assert(EltSize == 16 && "unhandled register spill split size");
    1471             : 
    1472             :   static const int16_t Sub0_15_128[] = {
    1473             :     AMDGPU::sub0_sub1_sub2_sub3,
    1474             :     AMDGPU::sub4_sub5_sub6_sub7,
    1475             :     AMDGPU::sub8_sub9_sub10_sub11,
    1476             :     AMDGPU::sub12_sub13_sub14_sub15
    1477             :   };
    1478             : 
    1479             :   static const int16_t Sub0_7_128[] = {
    1480             :     AMDGPU::sub0_sub1_sub2_sub3,
    1481             :     AMDGPU::sub4_sub5_sub6_sub7
    1482             :   };
    1483             : 
    1484           6 :   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1485           4 :   case 128:
    1486           4 :     return {};
    1487             :   case 256:
    1488             :     return makeArrayRef(Sub0_7_128);
    1489             :   case 512:
    1490             :     return makeArrayRef(Sub0_15_128);
    1491           0 :   default:
    1492           0 :     llvm_unreachable("unhandled register size");
    1493             :   }
    1494             : }
    1495             : 
    1496             : const TargetRegisterClass*
    1497     5027337 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
    1498             :                                   unsigned Reg) const {
    1499     5027337 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
    1500      738565 :     return  MRI.getRegClass(Reg);
    1501             : 
    1502     4288772 :   return getPhysRegClass(Reg);
    1503             : }
    1504             : 
    1505     4825149 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
    1506             :                             unsigned Reg) const {
    1507     4825149 :   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
    1508             :   assert(RC && "Register class for the reg not found");
    1509     4825149 :   return hasVGPRs(RC);
    1510             : }
    1511             : 
    1512      191998 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
    1513             :                                     const TargetRegisterClass *SrcRC,
    1514             :                                     unsigned SubReg,
    1515             :                                     const TargetRegisterClass *DstRC,
    1516             :                                     unsigned DstSubReg,
    1517             :                                     const TargetRegisterClass *NewRC,
    1518             :                                     LiveIntervals &LIS) const {
    1519             :   unsigned SrcSize = getRegSizeInBits(*SrcRC);
    1520             :   unsigned DstSize = getRegSizeInBits(*DstRC);
    1521             :   unsigned NewSize = getRegSizeInBits(*NewRC);
    1522             : 
    1523             :   // Do not increase size of registers beyond dword, we would need to allocate
    1524             :   // adjacent registers and constraint regalloc more than needed.
    1525             : 
    1526             :   // Always allow dword coalescing.
    1527      191998 :   if (SrcSize <= 32 || DstSize <= 32)
    1528             :     return true;
    1529             : 
    1530       52011 :   return NewSize <= DstSize || NewSize <= SrcSize;
    1531             : }
    1532             : 
    1533      124674 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    1534             :                                              MachineFunction &MF) const {
    1535             : 
    1536      124674 :   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    1537      124674 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    1538             : 
    1539      124674 :   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
    1540             :                                                        MF.getFunction());
    1541      249348 :   switch (RC->getID()) {
    1542             :   default:
    1543             :     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
    1544       62337 :   case AMDGPU::VGPR_32RegClassID:
    1545       62347 :     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
    1546       62337 :   case AMDGPU::SGPR_32RegClassID:
    1547       62450 :     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
    1548             :   }
    1549             : }
    1550             : 
    1551      248352 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
    1552             :                                                 unsigned Idx) const {
    1553      248352 :   if (Idx == getVGPRPressureSet())
    1554       62337 :     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
    1555       62337 :                                const_cast<MachineFunction &>(MF));
    1556             : 
    1557      186015 :   if (Idx == getSGPRPressureSet())
    1558       62337 :     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
    1559       62337 :                                const_cast<MachineFunction &>(MF));
    1560             : 
    1561      123678 :   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
    1562             : }
    1563             : 
    1564     1868475 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
    1565             :   static const int Empty[] = { -1 };
    1566             : 
    1567     1868475 :   if (hasRegUnit(AMDGPU::M0, RegUnit))
    1568             :     return Empty;
    1569     1865983 :   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
    1570             : }
    1571             : 
    1572        4085 : unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
    1573             :   // Not a callee saved register.
    1574        4085 :   return AMDGPU::SGPR30_SGPR31;
    1575             : }
    1576             : 
    1577             : const TargetRegisterClass *
    1578         287 : SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
    1579             :                                          const MachineRegisterInfo &MRI) const {
    1580         287 :   unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
    1581         287 :   const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
    1582         119 :   if (!RB)
    1583             :     return nullptr;
    1584             : 
    1585         119 :   switch (Size) {
    1586          84 :   case 32:
    1587          84 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
    1588             :                                                   &AMDGPU::SReg_32_XM0RegClass;
    1589          35 :   case 64:
    1590          35 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
    1591             :                                                    &AMDGPU::SReg_64_XEXECRegClass;
    1592           0 :   case 96:
    1593           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
    1594             :                                                   nullptr;
    1595           0 :   case 128:
    1596           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
    1597             :                                                   &AMDGPU::SReg_128RegClass;
    1598           0 :   default:
    1599           0 :     llvm_unreachable("not implemented");
    1600             :   }
    1601             : }

Generated by: LCOV version 1.13