LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIRegisterInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 531 621 85.5 %
Date: 2018-07-13 00:08:38 Functions: 41 45 91.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// SI implementation of the TargetRegisterInfo class.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "SIRegisterInfo.h"
      16             : #include "AMDGPURegisterBankInfo.h"
      17             : #include "AMDGPUSubtarget.h"
      18             : #include "SIInstrInfo.h"
      19             : #include "SIMachineFunctionInfo.h"
      20             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      21             : #include "llvm/CodeGen/MachineFrameInfo.h"
      22             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      23             : #include "llvm/CodeGen/RegisterScavenging.h"
      24             : #include "llvm/IR/Function.h"
      25             : #include "llvm/IR/LLVMContext.h"
      26             : 
      27             : using namespace llvm;
      28             : 
      29             : static bool hasPressureSet(const int *PSets, unsigned PSetID) {
      30       86298 :   for (unsigned i = 0; PSets[i] != -1; ++i) {
      31       43149 :     if (PSets[i] == (int)PSetID)
      32             :       return true;
      33             :   }
      34             :   return false;
      35             : }
      36             : 
      37       22710 : void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
      38             :                                          BitVector &PressureSets) const {
      39       34065 :   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
      40       22710 :     const int *PSets = getRegUnitPressureSets(*U);
      41       22710 :     if (hasPressureSet(PSets, PSetID)) {
      42             :       PressureSets.set(PSetID);
      43             :       break;
      44             :     }
      45             :   }
      46       22710 : }
      47             : 
      48       99743 : static cl::opt<bool> EnableSpillSGPRToSMEM(
      49             :   "amdgpu-spill-sgpr-to-smem",
      50       99743 :   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
      51      299229 :   cl::init(false));
      52             : 
      53       99743 : static cl::opt<bool> EnableSpillSGPRToVGPR(
      54             :   "amdgpu-spill-sgpr-to-vgpr",
      55       99743 :   cl::desc("Enable spilling VGPRs to SGPRs"),
      56             :   cl::ReallyHidden,
      57      299229 :   cl::init(true));
      58             : 
      59        2271 : SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
      60             :   AMDGPURegisterInfo(),
      61             :   SGPRPressureSets(getNumRegPressureSets()),
      62             :   VGPRPressureSets(getNumRegPressureSets()),
      63             :   SpillSGPRToVGPR(false),
      64        2271 :   SpillSGPRToSMEM(false) {
      65        2271 :   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
      66           5 :     SpillSGPRToSMEM = true;
      67        2266 :   else if (EnableSpillSGPRToVGPR)
      68        2262 :     SpillSGPRToVGPR = true;
      69             : 
      70        2271 :   unsigned NumRegPressureSets = getNumRegPressureSets();
      71             : 
      72        2271 :   SGPRSetID = NumRegPressureSets;
      73        2271 :   VGPRSetID = NumRegPressureSets;
      74             : 
      75       24981 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      76       11355 :     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
      77       11355 :     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
      78             :   }
      79             : 
      80             :   // Determine the number of reg units for each pressure set.
      81        2271 :   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
      82      987885 :   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
      83      985614 :     const int *PSets = getRegUnitPressureSets(i);
      84     4383030 :     for (unsigned j = 0; PSets[j] != -1; ++j) {
      85     3397416 :       ++PressureSetRegUnits[PSets[j]];
      86             :     }
      87             :   }
      88             : 
      89             :   unsigned VGPRMax = 0, SGPRMax = 0;
      90       24981 :   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
      91        6813 :     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
      92        2271 :       VGPRSetID = i;
      93        2271 :       VGPRMax = PressureSetRegUnits[i];
      94        2271 :       continue;
      95             :     }
      96       18168 :     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
      97        9084 :       SGPRSetID = i;
      98        9084 :       SGPRMax = PressureSetRegUnits[i];
      99             :     }
     100             :   }
     101             : 
     102             :   assert(SGPRSetID < NumRegPressureSets &&
     103             :          VGPRSetID < NumRegPressureSets);
     104        2271 : }
     105             : 
     106       16716 : unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
     107             :   const MachineFunction &MF) const {
     108             : 
     109       16716 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     110       33432 :   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
     111       16716 :   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
     112       33432 :   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
     113             : }
     114             : 
     115             : static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
     116             :   unsigned Reg;
     117             : 
     118             :   // Try to place it in a hole after PrivateSegmentBufferReg.
     119       16719 :   if (RegCount & 3) {
     120             :     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
     121             :     // alignment constraints, so we have a hole where can put the wave offset.
     122       16535 :     Reg = RegCount - 1;
     123             :   } else {
     124             :     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
     125             :     // wave offset before it.
     126         184 :     Reg = RegCount - 5;
     127             :   }
     128             : 
     129             :   return Reg;
     130             : }
     131             : 
     132       16719 : unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
     133             :   const MachineFunction &MF) const {
     134       16719 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     135       16719 :   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
     136       33438 :   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
     137             : }
     138             : 
     139        1764 : unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
     140             :   const MachineFunction &MF) const {
     141        1764 :   return AMDGPU::SGPR32;
     142             : }
     143             : 
     144       36835 : BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     145       36835 :   BitVector Reserved(getNumRegs());
     146             : 
     147             :   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
     148             :   // this seems likely to result in bugs, so I'm marking them as reserved.
     149       36835 :   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
     150       36835 :   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
     151             : 
     152             :   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
     153       36835 :   reserveRegisterTuples(Reserved, AMDGPU::M0);
     154             : 
     155             :   // Reserve the memory aperture registers.
     156       36835 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
     157       36835 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
     158       36835 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
     159       36835 :   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
     160             : 
     161             :   // Reserve xnack_mask registers - support is not implemented in Codegen.
     162       36835 :   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
     163             : 
     164             :   // Reserve Trap Handler registers - support is not implemented in Codegen.
     165       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TBA);
     166       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TMA);
     167       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
     168       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
     169       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
     170       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
     171       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
     172       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
     173       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
     174       36835 :   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
     175             : 
     176       36835 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     177             : 
     178       36835 :   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
     179       36835 :   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     180      335191 :   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     181      149178 :     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     182      149178 :     reserveRegisterTuples(Reserved, Reg);
     183             :   }
     184             : 
     185       36835 :   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
     186       36835 :   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
     187       50563 :   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     188        6864 :     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     189        6864 :     reserveRegisterTuples(Reserved, Reg);
     190             :   }
     191             : 
     192             :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     193             : 
     194       36835 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     195       36835 :   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
     196             :     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
     197       36835 :     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
     198             :   }
     199             : 
     200       36835 :   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
     201       36835 :   if (ScratchRSrcReg != AMDGPU::NoRegister) {
     202             :     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
     203             :     // to spill.
     204             :     // TODO: May need to reserve a VGPR if doing LDS spilling.
     205       36835 :     reserveRegisterTuples(Reserved, ScratchRSrcReg);
     206             :     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
     207             :   }
     208             : 
     209             :   // We have to assume the SP is needed in case there are calls in the function,
     210             :   // which is detected after the function is lowered. If we aren't really going
     211             :   // to need SP, don't bother reserving it.
     212       36835 :   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
     213             : 
     214       36835 :   if (StackPtrReg != AMDGPU::NoRegister) {
     215       36835 :     reserveRegisterTuples(Reserved, StackPtrReg);
     216             :     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
     217             :   }
     218             : 
     219       36835 :   unsigned FrameReg = MFI->getFrameOffsetReg();
     220       36835 :   if (FrameReg != AMDGPU::NoRegister) {
     221       36835 :     reserveRegisterTuples(Reserved, FrameReg);
     222             :     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
     223             :   }
     224             : 
     225       36835 :   return Reserved;
     226             : }
     227             : 
     228       35748 : bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
     229             :   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
     230       35748 :   if (Info->isEntryFunction()) {
     231       32916 :     const MachineFrameInfo &MFI = Fn.getFrameInfo();
     232       32916 :     return MFI.hasStackObjects() || MFI.hasCalls();
     233             :   }
     234             : 
     235             :   // May need scavenger for dealing with callee saved registers.
     236             :   return true;
     237             : }
     238             : 
     239       17874 : bool SIRegisterInfo::requiresFrameIndexScavenging(
     240             :   const MachineFunction &MF) const {
     241       17874 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     242       17874 :   if (MFI.hasStackObjects())
     243             :     return true;
     244             : 
     245             :   // May need to deal with callee saved registers.
     246             :   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     247       17257 :   return !Info->isEntryFunction();
     248             : }
     249             : 
     250       17549 : bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
     251             :   const MachineFunction &MF) const {
     252             :   // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
     253             :   // create a virtual register for it during frame index elimination, so the
     254             :   // scavenger is directly needed.
     255       18166 :   return MF.getFrameInfo().hasStackObjects() &&
     256       17879 :          MF.getSubtarget<SISubtarget>().hasScalarStores() &&
     257       17879 :          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
     258             : }
     259             : 
     260       17861 : bool SIRegisterInfo::requiresVirtualBaseRegisters(
     261             :   const MachineFunction &) const {
     262             :   // There are no special dedicated stack or frame pointers.
     263       17861 :   return true;
     264             : }
     265             : 
     266       35784 : bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     267             :   // This helps catch bugs as verifier errors.
     268       35784 :   return true;
     269             : }
     270             : 
     271        4447 : int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
     272             :   assert(SIInstrInfo::isMUBUF(*MI));
     273             : 
     274        8894 :   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     275             :                                           AMDGPU::OpName::offset);
     276        8894 :   return MI->getOperand(OffIdx).getImm();
     277             : }
     278             : 
     279           4 : int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
     280             :                                                  int Idx) const {
     281           4 :   if (!SIInstrInfo::isMUBUF(*MI))
     282             :     return 0;
     283             : 
     284             :   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
     285             :                                            AMDGPU::OpName::vaddr) &&
     286             :          "Should never see frame index on non-address operand");
     287             : 
     288           4 :   return getMUBUFInstrOffset(MI);
     289             : }
     290             : 
     291        4787 : bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     292        4787 :   if (!MI->mayLoadOrStore())
     293             :     return false;
     294             : 
     295        4443 :   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
     296             : 
     297        4443 :   return !isUInt<12>(FullOffset);
     298             : }
     299             : 
     300           0 : void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
     301             :                                                   unsigned BaseReg,
     302             :                                                   int FrameIdx,
     303             :                                                   int64_t Offset) const {
     304             :   MachineBasicBlock::iterator Ins = MBB->begin();
     305           0 :   DebugLoc DL; // Defaults to "unknown"
     306             : 
     307           0 :   if (Ins != MBB->end())
     308             :     DL = Ins->getDebugLoc();
     309             : 
     310           0 :   MachineFunction *MF = MBB->getParent();
     311           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     312             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     313             : 
     314           0 :   if (Offset == 0) {
     315           0 :     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
     316             :       .addFrameIndex(FrameIdx);
     317             :     return;
     318             :   }
     319             : 
     320           0 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     321           0 :   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     322             : 
     323           0 :   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     324             : 
     325           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     326             :     .addImm(Offset);
     327           0 :   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
     328             :     .addFrameIndex(FrameIdx);
     329             : 
     330           0 :   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
     331           0 :     .addReg(OffsetReg, RegState::Kill)
     332           0 :     .addReg(FIReg);
     333             : }
     334             : 
     335           0 : void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
     336             :                                        int64_t Offset) const {
     337             : 
     338           0 :   MachineBasicBlock *MBB = MI.getParent();
     339           0 :   MachineFunction *MF = MBB->getParent();
     340           0 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     341             :   const SIInstrInfo *TII = Subtarget.getInstrInfo();
     342             : 
     343             : #ifndef NDEBUG
     344             :   // FIXME: Is it possible to be storing a frame index to itself?
     345             :   bool SeenFI = false;
     346             :   for (const MachineOperand &MO: MI.operands()) {
     347             :     if (MO.isFI()) {
     348             :       if (SeenFI)
     349             :         llvm_unreachable("should not see multiple frame indices");
     350             : 
     351             :       SeenFI = true;
     352             :     }
     353             :   }
     354             : #endif
     355             : 
     356           0 :   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
     357             :   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
     358             :   assert(TII->isMUBUF(MI));
     359             :   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
     360             :          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
     361             :          "should only be seeing frame offset relative FrameIndex");
     362             : 
     363             : 
     364           0 :   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
     365           0 :   int64_t NewOffset = OffsetOp->getImm() + Offset;
     366             :   assert(isUInt<12>(NewOffset) && "offset should be legal");
     367             : 
     368           0 :   FIOp->ChangeToRegister(BaseReg, false);
     369             :   OffsetOp->setImm(NewOffset);
     370           0 : }
     371             : 
     372           0 : bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     373             :                                         unsigned BaseReg,
     374             :                                         int64_t Offset) const {
     375           0 :   if (!SIInstrInfo::isMUBUF(*MI))
     376             :     return false;
     377             : 
     378           0 :   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
     379             : 
     380           0 :   return isUInt<12>(NewOffset);
     381             : }
     382             : 
     383           0 : const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
     384             :   const MachineFunction &MF, unsigned Kind) const {
     385             :   // This is inaccurate. It depends on the instruction and address space. The
     386             :   // only place where we should hit this is for dealing with frame indexes /
     387             :   // private accesses, so this is correct in that case.
     388           0 :   return &AMDGPU::VGPR_32RegClass;
     389             : }
     390             : 
     391        1267 : static unsigned getNumSubRegsForSpillOp(unsigned Op) {
     392             : 
     393        1267 :   switch (Op) {
     394             :   case AMDGPU::SI_SPILL_S512_SAVE:
     395             :   case AMDGPU::SI_SPILL_S512_RESTORE:
     396             :   case AMDGPU::SI_SPILL_V512_SAVE:
     397             :   case AMDGPU::SI_SPILL_V512_RESTORE:
     398             :     return 16;
     399           0 :   case AMDGPU::SI_SPILL_S256_SAVE:
     400             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     401             :   case AMDGPU::SI_SPILL_V256_SAVE:
     402             :   case AMDGPU::SI_SPILL_V256_RESTORE:
     403           0 :     return 8;
     404         664 :   case AMDGPU::SI_SPILL_S128_SAVE:
     405             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     406             :   case AMDGPU::SI_SPILL_V128_SAVE:
     407             :   case AMDGPU::SI_SPILL_V128_RESTORE:
     408         664 :     return 4;
     409           0 :   case AMDGPU::SI_SPILL_V96_SAVE:
     410             :   case AMDGPU::SI_SPILL_V96_RESTORE:
     411           0 :     return 3;
     412           0 :   case AMDGPU::SI_SPILL_S64_SAVE:
     413             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     414             :   case AMDGPU::SI_SPILL_V64_SAVE:
     415             :   case AMDGPU::SI_SPILL_V64_RESTORE:
     416           0 :     return 2;
     417         603 :   case AMDGPU::SI_SPILL_S32_SAVE:
     418             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     419             :   case AMDGPU::SI_SPILL_V32_SAVE:
     420             :   case AMDGPU::SI_SPILL_V32_RESTORE:
     421         603 :     return 1;
     422           0 :   default: llvm_unreachable("Invalid spill opcode");
     423             :   }
     424             : }
     425             : 
     426             : static int getOffsetMUBUFStore(unsigned Opc) {
     427        3083 :   switch (Opc) {
     428             :   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
     429             :     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
     430             :   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
     431             :     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
     432             :   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
     433             :     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
     434             :   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
     435             :     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
     436             :   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
     437             :     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
     438             :   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
     439             :     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
     440             :   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
     441             :     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
     442             :   default:
     443             :     return -1;
     444             :   }
     445             : }
     446             : 
     447        1772 : static int getOffsetMUBUFLoad(unsigned Opc) {
     448        1772 :   switch (Opc) {
     449             :   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
     450             :     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
     451          84 :   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
     452          84 :     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
     453           8 :   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
     454           8 :     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
     455          24 :   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
     456          24 :     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
     457           2 :   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
     458           2 :     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
     459           2 :   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
     460           2 :     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
     461          16 :   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
     462          16 :     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
     463           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
     464           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
     465           2 :   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
     466           2 :     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
     467           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
     468           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
     469           2 :   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
     470           2 :     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
     471           3 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
     472           3 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
     473           4 :   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
     474           4 :     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
     475           0 :   default:
     476           0 :     return -1;
     477             :   }
     478             : }
     479             : 
     480             : // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
     481             : // need to handle the case where an SGPR may need to be spilled while spilling.
     482        4855 : static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
     483             :                                       MachineFrameInfo &MFI,
     484             :                                       MachineBasicBlock::iterator MI,
     485             :                                       int Index,
     486             :                                       int64_t Offset) {
     487        4855 :   MachineBasicBlock *MBB = MI->getParent();
     488             :   const DebugLoc &DL = MI->getDebugLoc();
     489        4855 :   bool IsStore = MI->mayStore();
     490             : 
     491        4855 :   unsigned Opc = MI->getOpcode();
     492        6627 :   int LoadStoreOp = IsStore ?
     493             :     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
     494        1772 :   if (LoadStoreOp == -1)
     495             :     return false;
     496             : 
     497        4855 :   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
     498        9710 :   MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
     499             :     .add(*Reg)
     500        4855 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
     501        4855 :     .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
     502             :     .addImm(Offset)
     503             :     .addImm(0) // glc
     504             :     .addImm(0) // slc
     505             :     .addImm(0) // tfe
     506        4855 :     .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     507             : 
     508             :   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
     509        4855 :                                                        AMDGPU::OpName::vdata_in);
     510        4855 :   if (VDataIn)
     511             :     NewMI.add(*VDataIn);
     512             :   return true;
     513             : }
     514             : 
     515        2446 : void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     516             :                                          unsigned LoadStoreOp,
     517             :                                          int Index,
     518             :                                          unsigned ValueReg,
     519             :                                          bool IsKill,
     520             :                                          unsigned ScratchRsrcReg,
     521             :                                          unsigned ScratchOffsetReg,
     522             :                                          int64_t InstOffset,
     523             :                                          MachineMemOperand *MMO,
     524             :                                          RegScavenger *RS) const {
     525        2446 :   MachineBasicBlock *MBB = MI->getParent();
     526        2446 :   MachineFunction *MF = MI->getParent()->getParent();
     527        2446 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     528             :   const SIInstrInfo *TII = ST.getInstrInfo();
     529        2446 :   const MachineFrameInfo &MFI = MF->getFrameInfo();
     530             : 
     531        2446 :   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
     532             :   const DebugLoc &DL = MI->getDebugLoc();
     533        2446 :   bool IsStore = Desc.mayStore();
     534             : 
     535             :   bool RanOutOfSGPRs = false;
     536             :   bool Scavenged = false;
     537             :   unsigned SOffset = ScratchOffsetReg;
     538             : 
     539        2446 :   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
     540        4892 :   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
     541        2446 :   unsigned Size = NumSubRegs * 4;
     542        2446 :   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
     543             :   const int64_t OriginalImmOffset = Offset;
     544             : 
     545             :   unsigned Align = MFI.getObjectAlignment(Index);
     546             :   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
     547             : 
     548        2446 :   if (!isUInt<12>(Offset + Size)) {
     549             :     SOffset = AMDGPU::NoRegister;
     550             : 
     551             :     // We don't have access to the register scavenger if this function is called
     552             :     // during  PEI::scavengeFrameVirtualRegs().
     553         232 :     if (RS)
     554           0 :       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
     555             : 
     556           0 :     if (SOffset == AMDGPU::NoRegister) {
     557             :       // There are no free SGPRs, and since we are in the process of spilling
     558             :       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
     559             :       // on SI/CI and on VI it is true until we implement spilling using scalar
     560             :       // stores), we have no way to free up an SGPR.  Our solution here is to
     561             :       // add the offset directly to the ScratchOffset register, and then
     562             :       // subtract the offset after the spill to return ScratchOffset to it's
     563             :       // original value.
     564             :       RanOutOfSGPRs = true;
     565             :       SOffset = ScratchOffsetReg;
     566             :     } else {
     567             :       Scavenged = true;
     568             :     }
     569             : 
     570         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
     571         232 :       .addReg(ScratchOffsetReg)
     572             :       .addImm(Offset);
     573             : 
     574             :     Offset = 0;
     575             :   }
     576             : 
     577             :   const unsigned EltSize = 4;
     578             : 
     579       15330 :   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     580       11770 :     unsigned SubReg = NumSubRegs == 1 ?
     581        5328 :       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
     582             : 
     583             :     unsigned SOffsetRegState = 0;
     584             :     unsigned SrcDstRegState = getDefRegState(!IsStore);
     585        6442 :     if (i + 1 == e) {
     586             :       SOffsetRegState |= getKillRegState(Scavenged);
     587             :       // The last implicit use carries the "Kill" flag.
     588        2446 :       SrcDstRegState |= getKillRegState(IsKill);
     589             :     }
     590             : 
     591        6442 :     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
     592             :     MachineMemOperand *NewMMO
     593       12884 :       = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
     594        6442 :                                  EltSize, MinAlign(Align, EltSize * i));
     595             : 
     596       12884 :     auto MIB = BuildMI(*MBB, MI, DL, Desc)
     597        6442 :       .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
     598        6442 :       .addReg(ScratchRsrcReg)
     599        6442 :       .addReg(SOffset, SOffsetRegState)
     600             :       .addImm(Offset)
     601             :       .addImm(0) // glc
     602             :       .addImm(0) // slc
     603             :       .addImm(0) // tfe
     604        6442 :       .addMemOperand(NewMMO);
     605             : 
     606        6442 :     if (NumSubRegs > 1)
     607        5328 :       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
     608             :   }
     609             : 
     610        2446 :   if (RanOutOfSGPRs) {
     611             :     // Subtract the offset we added to the ScratchOffset register.
     612         696 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
     613         232 :       .addReg(ScratchOffsetReg)
     614             :       .addImm(OriginalImmOffset);
     615             :   }
     616        2446 : }
     617             : 
     618             : static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
     619             :                                                      bool Store) {
     620          28 :   if (SuperRegSize % 16 == 0) {
     621             :     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
     622             :                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
     623             :   }
     624             : 
     625          22 :   if (SuperRegSize % 8 == 0) {
     626             :     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
     627             :                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
     628             :   }
     629             : 
     630             :   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
     631             :                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
     632             : }
     633             : 
     634         662 : bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
     635             :                                int Index,
     636             :                                RegScavenger *RS,
     637             :                                bool OnlyToVGPR) const {
     638         662 :   MachineBasicBlock *MBB = MI->getParent();
     639         662 :   MachineFunction *MF = MBB->getParent();
     640         662 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     641             :   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
     642             : 
     643             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     644         662 :     = MFI->getSGPRToVGPRSpills(Index);
     645         662 :   bool SpillToVGPR = !VGPRSpills.empty();
     646         662 :   if (OnlyToVGPR && !SpillToVGPR)
     647             :     return false;
     648             : 
     649         662 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     650         662 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     651             :   const SIInstrInfo *TII = ST.getInstrInfo();
     652             : 
     653         662 :   unsigned SuperReg = MI->getOperand(0).getReg();
     654             :   bool IsKill = MI->getOperand(0).isKill();
     655             :   const DebugLoc &DL = MI->getDebugLoc();
     656             : 
     657         662 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     658             : 
     659         662 :   bool SpillToSMEM = spillSGPRToSMEM();
     660         662 :   if (SpillToSMEM && OnlyToVGPR)
     661             :     return false;
     662             : 
     663             :   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
     664             :                          SuperReg != MFI->getFrameOffsetReg() &&
     665             :                          SuperReg != MFI->getScratchWaveOffsetReg()));
     666             : 
     667             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     668             : 
     669             :   unsigned OffsetReg = AMDGPU::M0;
     670             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     671             : 
     672         662 :   if (SpillToSMEM) {
     673          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     674          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     675          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     676          14 :         .addReg(AMDGPU::M0);
     677             :     }
     678             :   }
     679             : 
     680             :   unsigned ScalarStoreOp;
     681         662 :   unsigned EltSize = 4;
     682         662 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     683         676 :   if (SpillToSMEM && isSGPRClass(RC)) {
     684             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     685             :     // able to spill wider vmem spills.
     686             :     std::tie(EltSize, ScalarStoreOp) =
     687          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
     688             :   }
     689             : 
     690         662 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     691         662 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     692             : 
     693             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     694         662 :   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
     695        3178 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     696        2036 :     unsigned SubReg = NumSubRegs == 1 ?
     697        1556 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     698             : 
     699        1258 :     if (SpillToSMEM) {
     700             :       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     701             : 
     702             :       // The allocated memory size is really the wavefront size * the frame
     703             :       // index size. The widest register class is 64 bytes, so a 4-byte scratch
     704             :       // allocation is enough to spill this in a single stack object.
     705             :       //
     706             :       // FIXME: Frame size/offsets are computed earlier than this, so the extra
     707             :       // space is still unnecessarily allocated.
     708             : 
     709             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     710             :       MachinePointerInfo PtrInfo
     711          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     712             :       MachineMemOperand *MMO
     713          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     714          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     715             : 
     716             :       // SMEM instructions only support a single offset, so increment the wave
     717             :       // offset.
     718             : 
     719          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     720          15 :       if (Offset != 0) {
     721          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     722          15 :           .addReg(MFI->getFrameOffsetReg())
     723             :           .addImm(Offset);
     724             :       } else {
     725           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     726           0 :           .addReg(MFI->getFrameOffsetReg());
     727             :       }
     728             : 
     729          45 :       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
     730          15 :         .addReg(SubReg, getKillRegState(IsKill)) // sdata
     731          15 :         .addReg(MFI->getScratchRSrcReg())        // sbase
     732          15 :         .addReg(OffsetReg, RegState::Kill)       // soff
     733             :         .addImm(0)                               // glc
     734             :         .addMemOperand(MMO);
     735             : 
     736          15 :       continue;
     737             :     }
     738             : 
     739        1243 :     if (SpillToVGPR) {
     740        2322 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     741             : 
     742             :       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
     743             :       // only circumstance in which we say it is undefined is when it is the
     744             :       // first spill to this VGPR in the first basic block.
     745             :       bool VGPRDefined = true;
     746        1161 :       if (MBB == &MF->front())
     747        1120 :         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
     748             : 
     749             :       // Mark the "old value of vgpr" input undef only if this is the first sgpr
     750             :       // spill to this specific vgpr in the first basic block.
     751        2322 :       BuildMI(*MBB, MI, DL,
     752             :               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
     753        1161 :               Spill.VGPR)
     754        1161 :         .addReg(SubReg, getKillRegState(IsKill))
     755        1161 :         .addImm(Spill.Lane)
     756        1161 :         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
     757             : 
     758             :       // FIXME: Since this spills to another register instead of an actual
     759             :       // frame index, we should delete the frame index when all references to
     760             :       // it are fixed.
     761             :     } else {
     762             :       // XXX - Can to VGPR spill fail for some subregisters but not others?
     763          82 :       if (OnlyToVGPR)
     764           0 :         return false;
     765             : 
     766             :       // Spill SGPR to a frame index.
     767             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     768          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     769             :       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
     770             : 
     771             :       MachineInstrBuilder Mov
     772         246 :         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
     773          82 :         .addReg(SubReg, SubKillState);
     774             : 
     775             : 
     776             :       // There could be undef components of a spilled super register.
     777             :       // TODO: Can we detect this and skip the spill?
     778          82 :       if (NumSubRegs > 1) {
     779             :         // The last implicit use of the SuperReg carries the "Kill" flag.
     780             :         unsigned SuperKillState = 0;
     781          76 :         if (i + 1 == e)
     782             :           SuperKillState |= getKillRegState(IsKill);
     783          76 :         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
     784             :       }
     785             : 
     786             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     787             :       MachinePointerInfo PtrInfo
     788          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     789             :       MachineMemOperand *MMO
     790         164 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
     791          82 :                                    EltSize, MinAlign(Align, EltSize * i));
     792         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
     793          82 :         .addReg(TmpReg, RegState::Kill)    // src
     794             :         .addFrameIndex(Index)              // vaddr
     795          82 :         .addReg(MFI->getScratchRSrcReg())  // srrsrc
     796          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     797          82 :         .addImm(i * 4)                     // offset
     798             :         .addMemOperand(MMO);
     799             :     }
     800             :   }
     801             : 
     802         662 :   if (M0CopyReg != AMDGPU::NoRegister) {
     803          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     804          14 :       .addReg(M0CopyReg, RegState::Kill);
     805             :   }
     806             : 
     807         662 :   MI->eraseFromParent();
     808             :   MFI->addToSpilledSGPRs(NumSubRegs);
     809         662 :   return true;
     810             : }
     811             : 
     812         650 : bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
     813             :                                  int Index,
     814             :                                  RegScavenger *RS,
     815             :                                  bool OnlyToVGPR) const {
     816         650 :   MachineFunction *MF = MI->getParent()->getParent();
     817         650 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     818             :   MachineBasicBlock *MBB = MI->getParent();
     819         650 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     820             : 
     821             :   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
     822         650 :     = MFI->getSGPRToVGPRSpills(Index);
     823         650 :   bool SpillToVGPR = !VGPRSpills.empty();
     824         650 :   if (OnlyToVGPR && !SpillToVGPR)
     825             :     return false;
     826             : 
     827         650 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     828         650 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     829             :   const SIInstrInfo *TII = ST.getInstrInfo();
     830             :   const DebugLoc &DL = MI->getDebugLoc();
     831             : 
     832         650 :   unsigned SuperReg = MI->getOperand(0).getReg();
     833         650 :   bool SpillToSMEM = spillSGPRToSMEM();
     834         650 :   if (SpillToSMEM && OnlyToVGPR)
     835             :     return false;
     836             : 
     837             :   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
     838             : 
     839             :   unsigned OffsetReg = AMDGPU::M0;
     840             :   unsigned M0CopyReg = AMDGPU::NoRegister;
     841             : 
     842         650 :   if (SpillToSMEM) {
     843          14 :     if (RS->isRegUsed(AMDGPU::M0)) {
     844          14 :       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
     845          42 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
     846          14 :         .addReg(AMDGPU::M0);
     847             :     }
     848             :   }
     849             : 
     850         650 :   unsigned EltSize = 4;
     851             :   unsigned ScalarLoadOp;
     852             : 
     853         650 :   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
     854         664 :   if (SpillToSMEM && isSGPRClass(RC)) {
     855             :     // XXX - if private_element_size is larger than 4 it might be useful to be
     856             :     // able to spill wider vmem spills.
     857             :     std::tie(EltSize, ScalarLoadOp) =
     858          14 :           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
     859             :   }
     860             : 
     861         650 :   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
     862         650 :   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
     863             : 
     864             :   // SubReg carries the "Kill" flag when SubReg == SuperReg.
     865             :   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
     866             : 
     867        3132 :   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
     868        2009 :     unsigned SubReg = NumSubRegs == 1 ?
     869        1536 :       SuperReg : getSubReg(SuperReg, SplitParts[i]);
     870             : 
     871        1241 :     if (SpillToSMEM) {
     872             :       // FIXME: Size may be > 4 but extra bytes wasted.
     873             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     874             :       MachinePointerInfo PtrInfo
     875          15 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     876             :       MachineMemOperand *MMO
     877          30 :         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
     878          15 :                                    EltSize, MinAlign(Align, EltSize * i));
     879             : 
     880             :       // Add i * 4 offset
     881          15 :       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
     882          15 :       if (Offset != 0) {
     883          45 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
     884          15 :           .addReg(MFI->getFrameOffsetReg())
     885             :           .addImm(Offset);
     886             :       } else {
     887           0 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     888           0 :           .addReg(MFI->getFrameOffsetReg());
     889             :       }
     890             : 
     891             :       auto MIB =
     892          45 :         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
     893          15 :         .addReg(MFI->getScratchRSrcReg()) // sbase
     894          15 :         .addReg(OffsetReg, RegState::Kill)                // soff
     895             :         .addImm(0)                        // glc
     896          15 :         .addMemOperand(MMO);
     897             : 
     898          15 :       if (NumSubRegs > 1)
     899           2 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     900             : 
     901          15 :       continue;
     902             :     }
     903             : 
     904        1226 :     if (SpillToVGPR) {
     905        2288 :       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
     906             :       auto MIB =
     907        2288 :         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
     908             :                 SubReg)
     909        1144 :         .addReg(Spill.VGPR)
     910        2288 :         .addImm(Spill.Lane);
     911             : 
     912        1144 :       if (NumSubRegs > 1)
     913         690 :         MIB.addReg(SuperReg, RegState::ImplicitDefine);
     914             :     } else {
     915          82 :       if (OnlyToVGPR)
     916           0 :         return false;
     917             : 
     918             :       // Restore SGPR from a stack slot.
     919             :       // FIXME: We should use S_LOAD_DWORD here for VI.
     920          82 :       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     921             :       unsigned Align = FrameInfo.getObjectAlignment(Index);
     922             : 
     923             :       MachinePointerInfo PtrInfo
     924          82 :         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
     925             : 
     926         164 :       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
     927             :         MachineMemOperand::MOLoad, EltSize,
     928          82 :         MinAlign(Align, EltSize * i));
     929             : 
     930         246 :       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
     931             :         .addFrameIndex(Index)              // vaddr
     932          82 :         .addReg(MFI->getScratchRSrcReg())  // srsrc
     933          82 :         .addReg(MFI->getFrameOffsetReg())  // soffset
     934          82 :         .addImm(i * 4)                     // offset
     935             :         .addMemOperand(MMO);
     936             : 
     937             :       auto MIB =
     938         246 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
     939          82 :         .addReg(TmpReg, RegState::Kill);
     940             : 
     941          82 :       if (NumSubRegs > 1)
     942          76 :         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
     943             :     }
     944             :   }
     945             : 
     946         650 :   if (M0CopyReg != AMDGPU::NoRegister) {
     947          42 :     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
     948          14 :       .addReg(M0CopyReg, RegState::Kill);
     949             :   }
     950             : 
     951         650 :   MI->eraseFromParent();
     952         650 :   return true;
     953             : }
     954             : 
     955             : /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
     956             : /// a VGPR and the stack slot can be safely eliminated when all other users are
     957             : /// handled.
     958        1220 : bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
     959             :   MachineBasicBlock::iterator MI,
     960             :   int FI,
     961             :   RegScavenger *RS) const {
     962        1220 :   switch (MI->getOpcode()) {
     963         616 :   case AMDGPU::SI_SPILL_S512_SAVE:
     964             :   case AMDGPU::SI_SPILL_S256_SAVE:
     965             :   case AMDGPU::SI_SPILL_S128_SAVE:
     966             :   case AMDGPU::SI_SPILL_S64_SAVE:
     967             :   case AMDGPU::SI_SPILL_S32_SAVE:
     968         616 :     return spillSGPR(MI, FI, RS, true);
     969         604 :   case AMDGPU::SI_SPILL_S512_RESTORE:
     970             :   case AMDGPU::SI_SPILL_S256_RESTORE:
     971             :   case AMDGPU::SI_SPILL_S128_RESTORE:
     972             :   case AMDGPU::SI_SPILL_S64_RESTORE:
     973             :   case AMDGPU::SI_SPILL_S32_RESTORE:
     974         604 :     return restoreSGPR(MI, FI, RS, true);
     975           0 :   default:
     976           0 :     llvm_unreachable("not an SGPR spill instruction");
     977             :   }
     978             : }
     979             : 
     980        7753 : void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     981             :                                         int SPAdj, unsigned FIOperandNum,
     982             :                                         RegScavenger *RS) const {
     983        7753 :   MachineFunction *MF = MI->getParent()->getParent();
     984        7753 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     985             :   MachineBasicBlock *MBB = MI->getParent();
     986        7753 :   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     987        7753 :   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
     988        7753 :   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
     989             :   const SIInstrInfo *TII = ST.getInstrInfo();
     990             :   DebugLoc DL = MI->getDebugLoc();
     991             : 
     992        7753 :   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
     993        7753 :   int Index = MI->getOperand(FIOperandNum).getIndex();
     994             : 
     995       15506 :   switch (MI->getOpcode()) {
     996             :     // SGPR register spill
     997          46 :     case AMDGPU::SI_SPILL_S512_SAVE:
     998             :     case AMDGPU::SI_SPILL_S256_SAVE:
     999             :     case AMDGPU::SI_SPILL_S128_SAVE:
    1000             :     case AMDGPU::SI_SPILL_S64_SAVE:
    1001             :     case AMDGPU::SI_SPILL_S32_SAVE: {
    1002          46 :       spillSGPR(MI, Index, RS);
    1003          46 :       break;
    1004             :     }
    1005             : 
    1006             :     // SGPR register restore
    1007          46 :     case AMDGPU::SI_SPILL_S512_RESTORE:
    1008             :     case AMDGPU::SI_SPILL_S256_RESTORE:
    1009             :     case AMDGPU::SI_SPILL_S128_RESTORE:
    1010             :     case AMDGPU::SI_SPILL_S64_RESTORE:
    1011             :     case AMDGPU::SI_SPILL_S32_RESTORE: {
    1012          46 :       restoreSGPR(MI, Index, RS);
    1013          46 :       break;
    1014             :     }
    1015             : 
    1016             :     // VGPR register spill
    1017             :     case AMDGPU::SI_SPILL_V512_SAVE:
    1018             :     case AMDGPU::SI_SPILL_V256_SAVE:
    1019             :     case AMDGPU::SI_SPILL_V128_SAVE:
    1020             :     case AMDGPU::SI_SPILL_V96_SAVE:
    1021             :     case AMDGPU::SI_SPILL_V64_SAVE:
    1022             :     case AMDGPU::SI_SPILL_V32_SAVE: {
    1023             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1024        1267 :                                                          AMDGPU::OpName::vdata);
    1025        2534 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
    1026             :             Index,
    1027             :             VData->getReg(), VData->isKill(),
    1028             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1029             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1030             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1031        1267 :             *MI->memoperands_begin(),
    1032             :             RS);
    1033        2534 :       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
    1034        1267 :       MI->eraseFromParent();
    1035        1267 :       break;
    1036             :     }
    1037             :     case AMDGPU::SI_SPILL_V32_RESTORE:
    1038             :     case AMDGPU::SI_SPILL_V64_RESTORE:
    1039             :     case AMDGPU::SI_SPILL_V96_RESTORE:
    1040             :     case AMDGPU::SI_SPILL_V128_RESTORE:
    1041             :     case AMDGPU::SI_SPILL_V256_RESTORE:
    1042             :     case AMDGPU::SI_SPILL_V512_RESTORE: {
    1043             :       const MachineOperand *VData = TII->getNamedOperand(*MI,
    1044        1179 :                                                          AMDGPU::OpName::vdata);
    1045             : 
    1046        2358 :       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
    1047             :             Index,
    1048             :             VData->getReg(), VData->isKill(),
    1049             :             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
    1050             :             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
    1051             :             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
    1052        1179 :             *MI->memoperands_begin(),
    1053             :             RS);
    1054        1179 :       MI->eraseFromParent();
    1055        1179 :       break;
    1056             :     }
    1057             : 
    1058             :     default: {
    1059             :       const DebugLoc &DL = MI->getDebugLoc();
    1060             :       bool IsMUBUF = TII->isMUBUF(*MI);
    1061             : 
    1062        5559 :       if (!IsMUBUF &&
    1063         344 :           MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
    1064             :         // Convert to an absolute stack address by finding the offset from the
    1065             :         // scratch wave base and scaling by the wave size.
    1066             :         //
    1067             :         // In an entry function/kernel the stack address is already the
    1068             :         // absolute address relative to the scratch wave offset.
    1069             : 
    1070             :         unsigned DiffReg
    1071          36 :           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1072             : 
    1073          36 :         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
    1074          72 :         unsigned ResultReg = IsCopy ?
    1075          31 :           MI->getOperand(0).getReg() :
    1076          36 :           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1077             : 
    1078         108 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
    1079          36 :           .addReg(MFI->getFrameOffsetReg())
    1080          36 :           .addReg(MFI->getScratchWaveOffsetReg());
    1081             : 
    1082             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1083          36 :         if (Offset == 0) {
    1084             :           // XXX - This never happens because of emergency scavenging slot at 0?
    1085           0 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
    1086           0 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1087           0 :             .addReg(DiffReg);
    1088             :         } else {
    1089             :           unsigned ScaledReg
    1090          36 :             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1091             : 
    1092         108 :           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
    1093          72 :             .addImm(Log2_32(ST.getWavefrontSize()))
    1094          36 :             .addReg(DiffReg, RegState::Kill);
    1095             : 
    1096             :           // TODO: Fold if use instruction is another add of a constant.
    1097          36 :           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
    1098          64 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1099             :               .addImm(Offset)
    1100          32 :               .addReg(ScaledReg, RegState::Kill);
    1101             :           } else {
    1102             :             unsigned ConstOffsetReg
    1103           4 :               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
    1104             : 
    1105          12 :             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
    1106             :               .addImm(Offset);
    1107           8 :             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
    1108           4 :               .addReg(ConstOffsetReg, RegState::Kill)
    1109           4 :               .addReg(ScaledReg, RegState::Kill);
    1110             :           }
    1111             :         }
    1112             : 
    1113             :         // Don't introduce an extra copy if we're just materializing in a mov.
    1114          36 :         if (IsCopy)
    1115          31 :           MI->eraseFromParent();
    1116             :         else
    1117           5 :           FIOp.ChangeToRegister(ResultReg, false, false, true);
    1118             :         return;
    1119             :       }
    1120             : 
    1121        5179 :       if (IsMUBUF) {
    1122             :         // Disable offen so we don't need a 0 vgpr base.
    1123             :         assert(static_cast<int>(FIOperandNum) ==
    1124             :                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
    1125             :                                           AMDGPU::OpName::vaddr));
    1126             : 
    1127             :         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
    1128             :                == MFI->getFrameOffsetReg());
    1129             : 
    1130             :         int64_t Offset = FrameInfo.getObjectOffset(Index);
    1131             :         int64_t OldImm
    1132        4871 :           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
    1133        4871 :         int64_t NewOffset = OldImm + Offset;
    1134             : 
    1135        9726 :         if (isUInt<12>(NewOffset) &&
    1136        4855 :             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
    1137        4855 :           MI->eraseFromParent();
    1138        4855 :           return;
    1139             :         }
    1140             :       }
    1141             : 
    1142             :       // If the offset is simply too big, don't convert to a scratch wave offset
    1143             :       // relative index.
    1144             : 
    1145             :       int64_t Offset = FrameInfo.getObjectOffset(Index);
    1146         324 :       FIOp.ChangeToImmediate(Offset);
    1147         324 :       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
    1148          16 :         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    1149          48 :         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
    1150             :           .addImm(Offset);
    1151          16 :         FIOp.ChangeToRegister(TmpReg, false, false, true);
    1152             :       }
    1153             :     }
    1154             :   }
    1155             : }
    1156             : 
    1157     3871071 : StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
    1158             :   #define AMDGPU_REG_ASM_NAMES
    1159             :   #include "AMDGPURegAsmNames.inc.cpp"
    1160             : 
    1161             :   #define REG_RANGE(BeginReg, EndReg, RegTable)            \
    1162             :     if (Reg >= BeginReg && Reg <= EndReg) {                \
    1163             :       unsigned Index = Reg - BeginReg;                     \
    1164             :       assert(Index < array_lengthof(RegTable));            \
    1165             :       return RegTable[Index];                              \
    1166             :     }
    1167             : 
    1168     4822073 :   REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
    1169     3767037 :   REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
    1170     2636771 :   REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
    1171     1738347 :   REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
    1172     1280515 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
    1173             :             VGPR96RegNames);
    1174             : 
    1175     1555552 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
    1176             :             AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
    1177             :             VGPR128RegNames);
    1178     1061904 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
    1179             :             AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
    1180             :             SGPR128RegNames);
    1181             : 
    1182     1218721 :   REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
    1183             :             AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1184             :             VGPR256RegNames);
    1185             : 
    1186      939424 :   REG_RANGE(
    1187             :     AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
    1188             :     AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
    1189             :     VGPR512RegNames);
    1190             : 
    1191      473398 :   REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
    1192             :             AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1193             :             SGPR256RegNames);
    1194             : 
    1195      414838 :   REG_RANGE(
    1196             :     AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
    1197             :     AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
    1198             :     SGPR512RegNames
    1199             :   );
    1200             : 
    1201             : #undef REG_RANGE
    1202             : 
    1203             :   // FIXME: Rename flat_scr so we don't need to special case this.
    1204      315734 :   switch (Reg) {
    1205             :   case AMDGPU::FLAT_SCR:
    1206        3295 :     return "flat_scratch";
    1207             :   case AMDGPU::FLAT_SCR_LO:
    1208        6885 :     return "flat_scratch_lo";
    1209             :   case AMDGPU::FLAT_SCR_HI:
    1210        6885 :     return "flat_scratch_hi";
    1211      298669 :   default:
    1212             :     // For the special named registers the default is fine.
    1213      298669 :     return TargetRegisterInfo::getRegAsmName(Reg);
    1214             :   }
    1215             : }
    1216             : 
    1217             : // FIXME: This is very slow. It might be worth creating a map from physreg to
    1218             : // register class.
    1219     5154372 : const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
    1220             :   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
    1221             : 
    1222             :   static const TargetRegisterClass *const BaseClasses[] = {
    1223             :     &AMDGPU::VGPR_32RegClass,
    1224             :     &AMDGPU::SReg_32RegClass,
    1225             :     &AMDGPU::VReg_64RegClass,
    1226             :     &AMDGPU::SReg_64RegClass,
    1227             :     &AMDGPU::VReg_96RegClass,
    1228             :     &AMDGPU::VReg_128RegClass,
    1229             :     &AMDGPU::SReg_128RegClass,
    1230             :     &AMDGPU::VReg_256RegClass,
    1231             :     &AMDGPU::SReg_256RegClass,
    1232             :     &AMDGPU::VReg_512RegClass,
    1233             :     &AMDGPU::SReg_512RegClass,
    1234             :     &AMDGPU::SCC_CLASSRegClass,
    1235             :     &AMDGPU::Pseudo_SReg_32RegClass,
    1236             :     &AMDGPU::Pseudo_SReg_128RegClass,
    1237             :   };
    1238             : 
    1239    27731090 :   for (const TargetRegisterClass *BaseClass : BaseClasses) {
    1240    30202131 :     if (BaseClass->contains(Reg)) {
    1241             :       return BaseClass;
    1242             :     }
    1243             :   }
    1244             :   return nullptr;
    1245             : }
    1246             : 
    1247             : // TODO: It might be helpful to have some target specific flags in
    1248             : // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
    1249    11361151 : bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
    1250             :   unsigned Size = getRegSizeInBits(*RC);
    1251    11361151 :   if (Size < 32)
    1252             :     return false;
    1253    11355784 :   switch (Size) {
    1254     5927850 :   case 32:
    1255     5927850 :     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
    1256     3675962 :   case 64:
    1257     3675962 :     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
    1258        1271 :   case 96:
    1259        1271 :     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
    1260     1508294 :   case 128:
    1261     1508294 :     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
    1262      185809 :   case 256:
    1263      185809 :     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
    1264       56598 :   case 512:
    1265       56598 :     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
    1266           0 :   default:
    1267           0 :     llvm_unreachable("Invalid register class size");
    1268             :   }
    1269             : }
    1270             : 
    1271      145563 : const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
    1272             :                                          const TargetRegisterClass *SRC) const {
    1273      145563 :   switch (getRegSizeInBits(*SRC)) {
    1274             :   case 32:
    1275             :     return &AMDGPU::VGPR_32RegClass;
    1276       15553 :   case 64:
    1277       15553 :     return &AMDGPU::VReg_64RegClass;
    1278           0 :   case 96:
    1279           0 :     return &AMDGPU::VReg_96RegClass;
    1280        5783 :   case 128:
    1281        5783 :     return &AMDGPU::VReg_128RegClass;
    1282         109 :   case 256:
    1283         109 :     return &AMDGPU::VReg_256RegClass;
    1284          55 :   case 512:
    1285          55 :     return &AMDGPU::VReg_512RegClass;
    1286           0 :   default:
    1287           0 :     llvm_unreachable("Invalid register class size");
    1288             :   }
    1289             : }
    1290             : 
    1291        1719 : const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
    1292             :                                          const TargetRegisterClass *VRC) const {
    1293        1719 :   switch (getRegSizeInBits(*VRC)) {
    1294             :   case 32:
    1295             :     return &AMDGPU::SGPR_32RegClass;
    1296         241 :   case 64:
    1297         241 :     return &AMDGPU::SReg_64RegClass;
    1298          12 :   case 128:
    1299          12 :     return &AMDGPU::SReg_128RegClass;
    1300           2 :   case 256:
    1301           2 :     return &AMDGPU::SReg_256RegClass;
    1302           0 :   case 512:
    1303           0 :     return &AMDGPU::SReg_512RegClass;
    1304           0 :   default:
    1305           0 :     llvm_unreachable("Invalid register class size");
    1306             :   }
    1307             : }
    1308             : 
    1309      373113 : const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
    1310             :                          const TargetRegisterClass *RC, unsigned SubIdx) const {
    1311      373113 :   if (SubIdx == AMDGPU::NoSubRegister)
    1312             :     return RC;
    1313             : 
    1314             :   // We can assume that each lane corresponds to one 32-bit register.
    1315       60203 :   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
    1316       60203 :   if (isSGPRClass(RC)) {
    1317       29338 :     switch (Count) {
    1318             :     case 1:
    1319             :       return &AMDGPU::SGPR_32RegClass;
    1320           0 :     case 2:
    1321           0 :       return &AMDGPU::SReg_64RegClass;
    1322           0 :     case 4:
    1323           0 :       return &AMDGPU::SReg_128RegClass;
    1324           0 :     case 8:
    1325           0 :       return &AMDGPU::SReg_256RegClass;
    1326           0 :     case 16: /* fall-through */
    1327             :     default:
    1328           0 :       llvm_unreachable("Invalid sub-register class size");
    1329             :     }
    1330             :   } else {
    1331       30865 :     switch (Count) {
    1332             :     case 1:
    1333             :       return &AMDGPU::VGPR_32RegClass;
    1334          58 :     case 2:
    1335          58 :       return &AMDGPU::VReg_64RegClass;
    1336           0 :     case 3:
    1337           0 :       return &AMDGPU::VReg_96RegClass;
    1338           0 :     case 4:
    1339           0 :       return &AMDGPU::VReg_128RegClass;
    1340           0 :     case 8:
    1341           0 :       return &AMDGPU::VReg_256RegClass;
    1342           0 :     case 16: /* fall-through */
    1343             :     default:
    1344           0 :       llvm_unreachable("Invalid sub-register class size");
    1345             :     }
    1346             :   }
    1347             : }
    1348             : 
    1349      538157 : bool SIRegisterInfo::shouldRewriteCopySrc(
    1350             :   const TargetRegisterClass *DefRC,
    1351             :   unsigned DefSubReg,
    1352             :   const TargetRegisterClass *SrcRC,
    1353             :   unsigned SrcSubReg) const {
    1354             :   // We want to prefer the smallest register class possible, so we don't want to
    1355             :   // stop and rewrite on anything that looks like a subregister
    1356             :   // extract. Operations mostly don't care about the super register class, so we
    1357             :   // only want to stop on the most basic of copies between the same register
    1358             :   // class.
    1359             :   //
    1360             :   // e.g. if we have something like
    1361             :   // %0 = ...
    1362             :   // %1 = ...
    1363             :   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
    1364             :   // %3 = COPY %2, sub0
    1365             :   //
    1366             :   // We want to look through the COPY to find:
    1367             :   //  => %3 = COPY %0
    1368             : 
    1369             :   // Plain copy.
    1370      538157 :   return getCommonSubClass(DefRC, SrcRC) != nullptr;
    1371             : }
    1372             : 
    1373             : /// Returns a register that is not used at any point in the function.
    1374             : ///        If all registers are used, then this function will return
    1375             : //         AMDGPU::NoRegister.
    1376             : unsigned
    1377         141 : SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
    1378             :                                    const TargetRegisterClass *RC,
    1379             :                                    const MachineFunction &MF) const {
    1380             : 
    1381        8756 :   for (unsigned Reg : *RC)
    1382        4374 :     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
    1383             :       return Reg;
    1384             :   return AMDGPU::NoRegister;
    1385             : }
    1386             : 
    1387        5833 : ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
    1388             :                                                    unsigned EltSize) const {
    1389        5833 :   if (EltSize == 4) {
    1390             :     static const int16_t Sub0_15[] = {
    1391             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1392             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1393             :       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
    1394             :       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
    1395             :     };
    1396             : 
    1397             :     static const int16_t Sub0_7[] = {
    1398             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1399             :       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
    1400             :     };
    1401             : 
    1402             :     static const int16_t Sub0_3[] = {
    1403             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
    1404             :     };
    1405             : 
    1406             :     static const int16_t Sub0_2[] = {
    1407             :       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
    1408             :     };
    1409             : 
    1410             :     static const int16_t Sub0_1[] = {
    1411             :       AMDGPU::sub0, AMDGPU::sub1,
    1412             :     };
    1413             : 
    1414        5674 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1415         933 :     case 32:
    1416         933 :       return {};
    1417             :     case 64:
    1418             :       return makeArrayRef(Sub0_1);
    1419             :     case 96:
    1420             :       return makeArrayRef(Sub0_2);
    1421             :     case 128:
    1422             :       return makeArrayRef(Sub0_3);
    1423             :     case 256:
    1424             :       return makeArrayRef(Sub0_7);
    1425             :     case 512:
    1426             :       return makeArrayRef(Sub0_15);
    1427           0 :     default:
    1428           0 :       llvm_unreachable("unhandled register size");
    1429             :     }
    1430             :   }
    1431             : 
    1432         159 :   if (EltSize == 8) {
    1433             :     static const int16_t Sub0_15_64[] = {
    1434             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1435             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
    1436             :       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
    1437             :       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
    1438             :     };
    1439             : 
    1440             :     static const int16_t Sub0_7_64[] = {
    1441             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
    1442             :       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
    1443             :     };
    1444             : 
    1445             : 
    1446             :     static const int16_t Sub0_3_64[] = {
    1447             :       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
    1448             :     };
    1449             : 
    1450         153 :     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1451          16 :     case 64:
    1452          16 :       return {};
    1453             :     case 128:
    1454             :       return makeArrayRef(Sub0_3_64);
    1455             :     case 256:
    1456             :       return makeArrayRef(Sub0_7_64);
    1457             :     case 512:
    1458             :       return makeArrayRef(Sub0_15_64);
    1459           0 :     default:
    1460           0 :       llvm_unreachable("unhandled register size");
    1461             :     }
    1462             :   }
    1463             : 
    1464             :   assert(EltSize == 16 && "unhandled register spill split size");
    1465             : 
    1466             :   static const int16_t Sub0_15_128[] = {
    1467             :     AMDGPU::sub0_sub1_sub2_sub3,
    1468             :     AMDGPU::sub4_sub5_sub6_sub7,
    1469             :     AMDGPU::sub8_sub9_sub10_sub11,
    1470             :     AMDGPU::sub12_sub13_sub14_sub15
    1471             :   };
    1472             : 
    1473             :   static const int16_t Sub0_7_128[] = {
    1474             :     AMDGPU::sub0_sub1_sub2_sub3,
    1475             :     AMDGPU::sub4_sub5_sub6_sub7
    1476             :   };
    1477             : 
    1478           6 :   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
    1479           4 :   case 128:
    1480           4 :     return {};
    1481             :   case 256:
    1482             :     return makeArrayRef(Sub0_7_128);
    1483             :   case 512:
    1484             :     return makeArrayRef(Sub0_15_128);
    1485           0 :   default:
    1486           0 :     llvm_unreachable("unhandled register size");
    1487             :   }
    1488             : }
    1489             : 
    1490             : const TargetRegisterClass*
    1491     4306644 : SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
    1492             :                                   unsigned Reg) const {
    1493     4306644 :   if (TargetRegisterInfo::isVirtualRegister(Reg))
    1494      401005 :     return  MRI.getRegClass(Reg);
    1495             : 
    1496     3905639 :   return getPhysRegClass(Reg);
    1497             : }
    1498             : 
    1499     4113018 : bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
    1500             :                             unsigned Reg) const {
    1501     4113018 :   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
    1502             :   assert(RC && "Register class for the reg not found");
    1503     4113018 :   return hasVGPRs(RC);
    1504             : }
    1505             : 
    1506      180793 : bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
    1507             :                                     const TargetRegisterClass *SrcRC,
    1508             :                                     unsigned SubReg,
    1509             :                                     const TargetRegisterClass *DstRC,
    1510             :                                     unsigned DstSubReg,
    1511             :                                     const TargetRegisterClass *NewRC,
    1512             :                                     LiveIntervals &LIS) const {
    1513             :   unsigned SrcSize = getRegSizeInBits(*SrcRC);
    1514             :   unsigned DstSize = getRegSizeInBits(*DstRC);
    1515             :   unsigned NewSize = getRegSizeInBits(*NewRC);
    1516             : 
    1517             :   // Do not increase size of registers beyond dword, we would need to allocate
    1518             :   // adjacent registers and constraint regalloc more than needed.
    1519             : 
    1520             :   // Always allow dword coalescing.
    1521      180793 :   if (SrcSize <= 32 || DstSize <= 32)
    1522             :     return true;
    1523             : 
    1524       52317 :   return NewSize <= DstSize || NewSize <= SrcSize;
    1525             : }
    1526             : 
    1527      112584 : unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    1528             :                                              MachineFunction &MF) const {
    1529             : 
    1530      112584 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
    1531      112584 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
    1532             : 
    1533      112584 :   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
    1534      112584 :                                                        MF.getFunction());
    1535      225168 :   switch (RC->getID()) {
    1536             :   default:
    1537             :     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
    1538       56292 :   case AMDGPU::VGPR_32RegClassID:
    1539      168876 :     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
    1540       56292 :   case AMDGPU::SGPR_32RegClassID:
    1541      168876 :     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
    1542             :   }
    1543             : }
    1544             : 
    1545      224493 : unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
    1546             :                                                 unsigned Idx) const {
    1547      224493 :   if (Idx == getVGPRPressureSet())
    1548             :     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
    1549       56292 :                                const_cast<MachineFunction &>(MF));
    1550             : 
    1551      168201 :   if (Idx == getSGPRPressureSet())
    1552             :     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
    1553       56292 :                                const_cast<MachineFunction &>(MF));
    1554             : 
    1555      111909 :   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
    1556             : }
    1557             : 
    1558     1564425 : const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
    1559             :   static const int Empty[] = { -1 };
    1560             : 
    1561     1564425 :   if (hasRegUnit(AMDGPU::M0, RegUnit))
    1562             :     return Empty;
    1563     1562154 :   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
    1564             : }
    1565             : 
    1566        3293 : unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
    1567             :   // Not a callee saved register.
    1568        3293 :   return AMDGPU::SGPR30_SGPR31;
    1569             : }
    1570             : 
    1571             : const TargetRegisterClass *
    1572         225 : SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
    1573             :                                          const MachineRegisterInfo &MRI) const {
    1574         225 :   unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
    1575         225 :   const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
    1576          98 :   if (!RB)
    1577             :     return nullptr;
    1578             : 
    1579          98 :   switch (Size) {
    1580          71 :   case 32:
    1581          71 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
    1582             :                                                   &AMDGPU::SReg_32_XM0RegClass;
    1583          27 :   case 64:
    1584          27 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
    1585             :                                                    &AMDGPU::SReg_64_XEXECRegClass;
    1586           0 :   case 96:
    1587           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
    1588             :                                                   nullptr;
    1589           0 :   case 128:
    1590           0 :     return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
    1591             :                                                   &AMDGPU::SReg_128RegClass;
    1592           0 :   default:
    1593           0 :     llvm_unreachable("not implemented");
    1594             :   }
    1595      299229 : }

Generated by: LCOV version 1.13