LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFrameLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 305 310 98.4 %
Date: 2017-09-14 15:23:50 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===----------------------- SIFrameLowering.cpp --------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIFrameLowering.h"
      11             : #include "AMDGPUSubtarget.h"
      12             : #include "SIInstrInfo.h"
      13             : #include "SIMachineFunctionInfo.h"
      14             : #include "SIRegisterInfo.h"
      15             : 
      16             : #include "llvm/CodeGen/MachineFrameInfo.h"
      17             : #include "llvm/CodeGen/MachineFunction.h"
      18             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      19             : #include "llvm/CodeGen/RegisterScavenging.h"
      20             : 
      21             : using namespace llvm;
      22             : 
      23             : 
      24             : static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
      25             :                                          const MachineFunction &MF) {
      26             :   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
      27        1173 :                       ST.getMaxNumSGPRs(MF) / 4);
      28             : }
      29             : 
      30             : static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
      31             :                                        const MachineFunction &MF) {
      32             :   return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
      33        2106 :                       ST.getMaxNumSGPRs(MF));
      34             : }
      35             : 
      36         332 : void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
      37             :                                           MachineFunction &MF,
      38             :                                           MachineBasicBlock &MBB) const {
      39         332 :   const SIInstrInfo *TII = ST.getInstrInfo();
      40         332 :   const SIRegisterInfo* TRI = &TII->getRegisterInfo();
      41         332 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
      42             : 
      43             :   // We don't need this if we only have spills since there is no user facing
      44             :   // scratch.
      45             : 
      46             :   // TODO: If we know we don't have flat instructions earlier, we can omit
      47             :   // this from the input registers.
      48             :   //
      49             :   // TODO: We only need to know if we access scratch space through a flat
      50             :   // pointer. Because we only detect if flat instructions are used at all,
      51             :   // this will be used more often than necessary on VI.
      52             : 
      53             :   // Debug location must be unknown since the first debug location is used to
      54             :   // determine the end of the prologue.
      55         602 :   DebugLoc DL;
      56         332 :   MachineBasicBlock::iterator I = MBB.begin();
      57             : 
      58             :   unsigned FlatScratchInitReg
      59         332 :     = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
      60             : 
      61         332 :   MachineRegisterInfo &MRI = MF.getRegInfo();
      62         332 :   MRI.addLiveIn(FlatScratchInitReg);
      63         664 :   MBB.addLiveIn(FlatScratchInitReg);
      64             : 
      65         332 :   unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
      66         332 :   unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
      67             : 
      68         332 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
      69             : 
      70             :   // Do a 64-bit pointer add.
      71         332 :   if (ST.flatScratchIsPointer()) {
      72         186 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
      73          62 :       .addReg(FlatScrInitLo)
      74          62 :       .addReg(ScratchWaveOffsetReg);
      75         186 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
      76          62 :       .addReg(FlatScrInitHi)
      77          62 :       .addImm(0);
      78             : 
      79          62 :     return;
      80             :   }
      81             : 
      82             :   // Copy the size in bytes.
      83         810 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
      84         270 :     .addReg(FlatScrInitHi, RegState::Kill);
      85             : 
      86             :   // Add wave offset in bytes to private base offset.
      87             :   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
      88         810 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
      89         270 :     .addReg(FlatScrInitLo)
      90         270 :     .addReg(ScratchWaveOffsetReg);
      91             : 
      92             :   // Convert offset to 256-byte units.
      93         810 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
      94         270 :     .addReg(FlatScrInitLo, RegState::Kill)
      95         270 :     .addImm(8);
      96             : }
      97             : 
      98       14169 : unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
      99             :   const SISubtarget &ST,
     100             :   const SIInstrInfo *TII,
     101             :   const SIRegisterInfo *TRI,
     102             :   SIMachineFunctionInfo *MFI,
     103             :   MachineFunction &MF) const {
     104       14169 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     105             : 
     106             :   // We need to insert initialization of the scratch resource descriptor.
     107       14169 :   unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
     108       28338 :   if (ScratchRsrcReg == AMDGPU::NoRegister ||
     109       14169 :       !MRI.isPhysRegUsed(ScratchRsrcReg))
     110             :     return AMDGPU::NoRegister;
     111             : 
     112        1523 :   if (ST.hasSGPRInitBug() ||
     113         687 :       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
     114             :     return ScratchRsrcReg;
     115             : 
     116             :   // We reserved the last registers for this. Shift it down to the end of those
     117             :   // which were actually used.
     118             :   //
     119             :   // FIXME: It might be safer to use a pseudoregister before replacement.
     120             : 
     121             :   // FIXME: We should be able to eliminate unused input registers. We only
     122             :   // cannot do this for the resources required for scratch access. For now we
     123             :   // skip over user SGPRs and may leave unused holes.
     124             : 
     125             :   // We find the resource first because it has an alignment requirement.
     126             : 
     127         782 :   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
     128         391 :   ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
     129        1173 :   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
     130             : 
     131             :   // Skip the last N reserved elements because they should have already been
     132             :   // reserved for VCC etc.
     133        1999 :   for (MCPhysReg Reg : AllSGPR128s) {
     134             :     // Pick the first unallocated one. Make sure we don't clobber the other
     135             :     // reserved input we needed.
     136        1600 :     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
     137         383 :       MRI.replaceRegWith(ScratchRsrcReg, Reg);
     138         766 :       MFI->setScratchRSrcReg(Reg);
     139         383 :       return Reg;
     140             :     }
     141             :   }
     142             : 
     143             :   return ScratchRsrcReg;
     144             : }
     145             : 
     146             : // Shift down registers reserved for the scratch wave offset and stack pointer
     147             : // SGPRs.
     148             : std::pair<unsigned, unsigned>
     149       14169 : SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
     150             :   const SISubtarget &ST,
     151             :   const SIInstrInfo *TII,
     152             :   const SIRegisterInfo *TRI,
     153             :   SIMachineFunctionInfo *MFI,
     154             :   MachineFunction &MF) const {
     155       14169 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     156       14169 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     157             : 
     158             :   // No replacement necessary.
     159       28338 :   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
     160       14169 :       !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
     161             :     assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
     162       26626 :     return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
     163             :   }
     164             : 
     165         856 :   unsigned SPReg = MFI->getStackPtrOffsetReg();
     166         856 :   if (ST.hasSGPRInitBug())
     167         154 :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     168             : 
     169        1404 :   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
     170             : 
     171         702 :   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
     172         702 :   if (NumPreloaded > AllSGPRs.size())
     173           0 :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     174             : 
     175        1404 :   AllSGPRs = AllSGPRs.slice(NumPreloaded);
     176             : 
     177             :   // We need to drop register from the end of the list that we cannot use
     178             :   // for the scratch wave offset.
     179             :   // + 2 s102 and s103 do not exist on VI.
     180             :   // + 2 for vcc
     181             :   // + 2 for xnack_mask
     182             :   // + 2 for flat_scratch
     183             :   // + 4 for registers reserved for scratch resource register
     184             :   // + 1 for register reserved for scratch wave offset.  (By exluding this
     185             :   //     register from the list to consider, it means that when this
     186             :   //     register is being used for the scratch wave offset and there
     187             :   //     are no other free SGPRs, then the value will stay in this register.
     188             :   // + 1 if stack pointer is used.
     189             :   // ----
     190             :   //  13 (+1)
     191         702 :   unsigned ReservedRegCount = 13;
     192             : 
     193         702 :   if (AllSGPRs.size() < ReservedRegCount)
     194           0 :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     195             : 
     196             :   bool HandledScratchWaveOffsetReg =
     197         702 :     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
     198             : 
     199       33209 :   for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
     200             :     // Pick the first unallocated SGPR. Be careful not to pick an alias of the
     201             :     // scratch descriptor, since we haven’t added its uses yet.
     202       31534 :     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
     203       20756 :       if (!HandledScratchWaveOffsetReg) {
     204         431 :         HandledScratchWaveOffsetReg = true;
     205             : 
     206         431 :         MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
     207         431 :         MFI->setScratchWaveOffsetReg(Reg);
     208             :         ScratchWaveOffsetReg = Reg;
     209             :         break;
     210             :       }
     211             :     }
     212             :   }
     213             : 
     214         702 :   return std::make_pair(ScratchWaveOffsetReg, SPReg);
     215             : }
     216             : 
     217       14169 : void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
     218             :                                                 MachineBasicBlock &MBB) const {
     219             :   // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
     220             :   // specified.
     221       14169 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     222       28338 :   auto AMDGPUASI = ST.getAMDGPUAS();
     223       14169 :   if (ST.debuggerEmitPrologue())
     224           4 :     emitDebuggerPrologue(MF, MBB);
     225             : 
     226             :   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
     227             : 
     228       14169 :   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     229             : 
     230             :   // If we only have SGPR spills, we won't actually be using scratch memory
     231             :   // since these spill to VGPRs.
     232             :   //
     233             :   // FIXME: We should be cleaning up these unused SGPR spill frame indices
     234             :   // somewhere.
     235             : 
     236       14169 :   const SIInstrInfo *TII = ST.getInstrInfo();
     237       14169 :   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     238       14169 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     239             : 
     240             :   // We need to do the replacement of the private segment buffer and wave offset
     241             :   // register even if there are no stack objects. There could be stores to undef
     242             :   // or a constant without an associated object.
     243             : 
     244             :   // FIXME: We still have implicit uses on SGPR spill instructions in case they
     245             :   // need to spill to vector memory. It's likely that will not happen, but at
     246             :   // this point it appears we need the setup. This part of the prolog should be
     247             :   // emitted after frame indices are eliminated.
     248             : 
     249       14169 :   if (MFI->hasFlatScratchInit())
     250         332 :     emitFlatScratchInit(ST, MF, MBB);
     251             : 
     252       14169 :   unsigned SPReg = MFI->getStackPtrOffsetReg();
     253       14169 :   if (SPReg != AMDGPU::SP_REG) {
     254             :     assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
     255             : 
     256         658 :     DebugLoc DL;
     257         329 :     const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     258         329 :     int64_t StackSize = FrameInfo.getStackSize();
     259             : 
     260         329 :     if (StackSize == 0) {
     261        1224 :       BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
     262         306 :         .addReg(MFI->getScratchWaveOffsetReg());
     263             :     } else {
     264          92 :       BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
     265          23 :         .addReg(MFI->getScratchWaveOffsetReg())
     266          46 :         .addImm(StackSize * ST.getWavefrontSize());
     267             :     }
     268             :   }
     269             : 
     270             :   unsigned ScratchRsrcReg
     271       14169 :     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
     272             : 
     273             :   unsigned ScratchWaveOffsetReg;
     274             :   std::tie(ScratchWaveOffsetReg, SPReg)
     275       42507 :     = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
     276             : 
     277             :   // It's possible to have uses of only ScratchWaveOffsetReg without
     278             :   // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
     279             :   // but the inverse is not true.
     280       14169 :   if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
     281             :     assert(ScratchRsrcReg == AMDGPU::NoRegister);
     282       13313 :     return;
     283             :   }
     284             : 
     285             :   // We need to insert initialization of the scratch resource descriptor.
     286             :   unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
     287         856 :     AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
     288             : 
     289         856 :   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
     290         856 :   if (ST.isAmdCodeObjectV2(MF)) {
     291         393 :     PreloadedPrivateBufferReg = MFI->getPreloadedReg(
     292             :       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
     293             :   }
     294             : 
     295         856 :   bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
     296        1692 :   bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
     297        1692 :                          MRI.isPhysRegUsed(ScratchRsrcReg);
     298             : 
     299             :   // We added live-ins during argument lowering, but since they were not used
     300             :   // they were deleted. We're adding the uses now, so add them back.
     301         856 :   if (OffsetRegUsed) {
     302             :     assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
     303             :            "scratch wave offset input is required");
     304         856 :     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
     305         856 :     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
     306             :   }
     307             : 
     308         856 :   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
     309             :     assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
     310         373 :     MRI.addLiveIn(PreloadedPrivateBufferReg);
     311         373 :     MBB.addLiveIn(PreloadedPrivateBufferReg);
     312             :   }
     313             : 
     314             :   // Make the register selected live throughout the function.
     315        3601 :   for (MachineBasicBlock &OtherBB : MF) {
     316        1033 :     if (&OtherBB == &MBB)
     317         856 :       continue;
     318             : 
     319         177 :     if (OffsetRegUsed)
     320         177 :       OtherBB.addLiveIn(ScratchWaveOffsetReg);
     321             : 
     322         177 :     if (ResourceRegUsed)
     323         177 :       OtherBB.addLiveIn(ScratchRsrcReg);
     324             :   }
     325             : 
     326        1712 :   DebugLoc DL;
     327         856 :   MachineBasicBlock::iterator I = MBB.begin();
     328             : 
     329             :   // If we reserved the original input registers, we don't need to copy to the
     330             :   // reserved registers.
     331             : 
     332             :   bool CopyBuffer = ResourceRegUsed &&
     333         373 :     PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
     334        1229 :     ST.isAmdCodeObjectV2(MF) &&
     335         856 :     ScratchRsrcReg != PreloadedPrivateBufferReg;
     336             : 
     337             :   // This needs to be careful of the copying order to avoid overwriting one of
     338             :   // the input registers before it's been copied to it's final
     339             :   // destination. Usually the offset should be copied first.
     340         856 :   bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
     341         856 :                                               ScratchWaveOffsetReg);
     342         856 :   if (CopyBuffer && CopyBufferFirst) {
     343           0 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
     344           0 :       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
     345             :   }
     346             : 
     347         856 :   if (OffsetRegUsed &&
     348             :       PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
     349        1485 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
     350         495 :       .addReg(PreloadedScratchWaveOffsetReg,
     351         495 :               MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
     352             :   }
     353             : 
     354         856 :   if (CopyBuffer && !CopyBufferFirst) {
     355         117 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
     356          39 :       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
     357             :   }
     358             : 
     359        1690 :   if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
     360             :     assert(!ST.isAmdCodeObjectV2(MF));
     361         926 :     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
     362             : 
     363         463 :     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
     364         463 :     unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
     365             : 
     366             :     // Use relocations to get the pointer, and setup the other bits manually.
     367         463 :     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
     368             : 
     369         463 :     if (MFI->hasImplicitBufferPtr()) {
     370           2 :       unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
     371             : 
     372           4 :       if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
     373           2 :         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
     374             : 
     375           2 :         BuildMI(MBB, I, DL, Mov64, Rsrc01)
     376           1 :           .addReg(MFI->getImplicitBufferPtrUserSGPR())
     377           1 :           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     378             :       } else {
     379           2 :         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
     380             : 
     381             :         PointerType *PtrTy =
     382           1 :           PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
     383           1 :                            AMDGPUASI.CONSTANT_ADDRESS);
     384           2 :         MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
     385           2 :         auto MMO = MF.getMachineMemOperand(PtrInfo,
     386           1 :                                            MachineMemOperand::MOLoad |
     387           1 :                                            MachineMemOperand::MOInvariant |
     388             :                                            MachineMemOperand::MODereferenceable,
     389           1 :                                            0, 0);
     390           2 :         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
     391           1 :           .addReg(MFI->getImplicitBufferPtrUserSGPR())
     392           1 :           .addImm(0) // offset
     393           1 :           .addImm(0) // glc
     394           1 :           .addMemOperand(MMO)
     395           1 :           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     396             :       }
     397             :     } else {
     398         461 :       unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
     399         461 :       unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     400             : 
     401         922 :       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
     402         461 :         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
     403         461 :         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     404             : 
     405         922 :       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
     406         461 :         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
     407         461 :         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     408             : 
     409             :     }
     410             : 
     411         926 :     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
     412         926 :       .addImm(Rsrc23 & 0xffffffff)
     413         463 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     414             : 
     415         926 :     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
     416         926 :       .addImm(Rsrc23 >> 32)
     417         463 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     418             :   }
     419             : }
     420             : 
     421       14832 : void SIFrameLowering::emitPrologue(MachineFunction &MF,
     422             :                                    MachineBasicBlock &MBB) const {
     423       14832 :   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     424       14832 :   if (FuncInfo->isEntryFunction()) {
     425       14169 :     emitEntryFunctionPrologue(MF, MBB);
     426       14169 :     return;
     427             :   }
     428             : 
     429         663 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     430         663 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     431         663 :   const SIInstrInfo *TII = ST.getInstrInfo();
     432             : 
     433         663 :   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     434         663 :   unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
     435             : 
     436         663 :   MachineBasicBlock::iterator MBBI = MBB.begin();
     437        1326 :   DebugLoc DL;
     438             : 
     439         663 :   bool NeedFP = hasFP(MF);
     440         663 :   if (NeedFP) {
     441             :     // If we need a base pointer, set it up here. It's whatever the value of
     442             :     // the stack pointer is at this point. Any variable size objects will be
     443             :     // allocated after this, so we can still use the base pointer to reference
     444             :     // locals.
     445         549 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
     446         183 :       .addReg(StackPtrReg)
     447         183 :       .setMIFlag(MachineInstr::FrameSetup);
     448             :   }
     449             : 
     450         663 :   uint32_t NumBytes = MFI.getStackSize();
     451         663 :   if (NumBytes != 0 && hasSP(MF)) {
     452         225 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
     453          75 :       .addReg(StackPtrReg)
     454         150 :       .addImm(NumBytes * ST.getWavefrontSize())
     455          75 :       .setMIFlag(MachineInstr::FrameSetup);
     456             :   }
     457             : 
     458         162 :   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
     459        1326 :          : FuncInfo->getSGPRSpillVGPRs()) {
     460          81 :     if (!Reg.FI.hasValue())
     461           6 :       continue;
     462          75 :     TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
     463         150 :                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
     464          75 :                              &TII->getRegisterInfo());
     465             :   }
     466             : }
     467             : 
     468       14820 : void SIFrameLowering::emitEpilogue(MachineFunction &MF,
     469             :                                    MachineBasicBlock &MBB) const {
     470       14820 :   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     471       14820 :   if (FuncInfo->isEntryFunction())
     472       14165 :     return;
     473             : 
     474         655 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     475         655 :   const SIInstrInfo *TII = ST.getInstrInfo();
     476         655 :   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
     477             : 
     478         162 :   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
     479        1310 :          : FuncInfo->getSGPRSpillVGPRs()) {
     480          81 :     if (!Reg.FI.hasValue())
     481           6 :       continue;
     482          75 :     TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
     483         150 :                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
     484          75 :                               &TII->getRegisterInfo());
     485             :   }
     486             : 
     487         655 :   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     488         655 :   if (StackPtrReg == AMDGPU::NoRegister)
     489             :     return;
     490             : 
     491         655 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     492         655 :   uint32_t NumBytes = MFI.getStackSize();
     493             : 
     494        1310 :   DebugLoc DL;
     495             : 
     496             :   // FIXME: Clarify distinction between no set SP and SP. For callee functions,
     497             :   // it's really whether we need SP to be accurate or not.
     498             : 
     499         655 :   if (NumBytes != 0 && hasSP(MF)) {
     500         225 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
     501          75 :       .addReg(StackPtrReg)
     502         150 :       .addImm(NumBytes * ST.getWavefrontSize())
     503          75 :       .setMIFlag(MachineInstr::FrameDestroy);
     504             :   }
     505             : }
     506             : 
     507             : static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
     508        4243 :   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
     509        1831 :        I != E; ++I) {
     510        1749 :     if (!MFI.isDeadObjectIndex(I))
     511             :       return false;
     512             :   }
     513             : 
     514             :   return true;
     515             : }
     516             : 
     517          22 : int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
     518             :                                             unsigned &FrameReg) const {
     519          44 :   const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
     520             : 
     521          22 :   FrameReg = RI->getFrameRegister(MF);
     522          44 :   return MF.getFrameInfo().getObjectOffset(FI);
     523             : }
     524             : 
     525       14832 : void SIFrameLowering::processFunctionBeforeFrameFinalized(
     526             :   MachineFunction &MF,
     527             :   RegScavenger *RS) const {
     528       14832 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     529             : 
     530       14832 :   if (!MFI.hasStackObjects())
     531             :     return;
     532             : 
     533         612 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     534         612 :   const SIInstrInfo *TII = ST.getInstrInfo();
     535         612 :   const SIRegisterInfo &TRI = TII->getRegisterInfo();
     536         612 :   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     537         612 :   bool AllSGPRSpilledToVGPRs = false;
     538             : 
     539         612 :   if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
     540         122 :     AllSGPRSpilledToVGPRs = true;
     541             : 
     542             :     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
     543             :     // are spilled to VGPRs, in which case we can eliminate the stack usage.
     544             :     //
     545             :     // XXX - This operates under the assumption that only other SGPR spills are
     546             :     // users of the frame index. I'm not 100% sure this is correct. The
     547             :     // StackColoring pass has a comment saying a future improvement would be to
     548             :     // merging of allocas with spill slots, but for now according to
     549             :     // MachineFrameInfo isSpillSlot can't alias any other object.
     550         623 :     for (MachineBasicBlock &MBB : MF) {
     551         257 :       MachineBasicBlock::iterator Next;
     552        5847 :       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
     553        5076 :         MachineInstr &MI = *I;
     554        5076 :         Next = std::next(I);
     555             : 
     556        5076 :         if (TII->isSGPRSpill(MI)) {
     557        1094 :           int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
     558        1094 :           if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
     559        1090 :             bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
     560             :             (void)Spilled;
     561             :             assert(Spilled && "failed to spill SGPR to VGPR when allocated");
     562             :           } else
     563             :             AllSGPRSpilledToVGPRs = false;
     564             :         }
     565             :       }
     566             :     }
     567             : 
     568         122 :     FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
     569             :   }
     570             : 
     571             :   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
     572             :   // but currently hasNonSpillStackObjects is set only from source
     573             :   // allocas. Stack temps produced from legalization are not counted currently.
     574         873 :   if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
     575         715 :       !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
     576             :     assert(RS && "RegScavenger required if spilling");
     577             : 
     578             :     // We force this to be at offset 0 so no user object ever has 0 as an
     579             :     // address, so we may use 0 as an invalid pointer value. This is because
     580             :     // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
     581             :     // is required to be address space 0, we are forced to accept this for
     582             :     // now. Ideally we could have the stack in another address space with 0 as a
     583             :     // valid pointer, and -1 as the null value.
     584             :     //
     585             :     // This will also waste additional space when user stack objects require > 4
     586             :     // byte alignment.
     587             :     //
     588             :     // The main cost here is losing the offset for addressing modes. However
     589             :     // this also ensures we shouldn't need a register for the offset when
     590             :     // emergency scavenging.
     591         574 :     int ScavengeFI = MFI.CreateFixedObject(
     592        1148 :       TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
     593             :     RS->addScavengingFrameIndex(ScavengeFI);
     594             :   }
     595             : }
     596             : 
     597         834 : MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
     598             :   MachineFunction &MF,
     599             :   MachineBasicBlock &MBB,
     600             :   MachineBasicBlock::iterator I) const {
     601         834 :   int64_t Amount = I->getOperand(0).getImm();
     602         834 :   if (Amount == 0)
     603         782 :     return MBB.erase(I);
     604             : 
     605          52 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     606          52 :   const SIInstrInfo *TII = ST.getInstrInfo();
     607         104 :   const DebugLoc &DL = I->getDebugLoc();
     608         104 :   unsigned Opc = I->getOpcode();
     609          52 :   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
     610          78 :   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
     611             : 
     612          52 :   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
     613          52 :   if (!TFI->hasReservedCallFrame(MF)) {
     614          38 :     unsigned Align = getStackAlignment();
     615             : 
     616          76 :     Amount = alignTo(Amount, Align);
     617             :     assert(isUInt<32>(Amount) && "exceeded stack address space size");
     618          38 :     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     619          38 :     unsigned SPReg = MFI->getStackPtrOffsetReg();
     620             : 
     621          38 :     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
     622         114 :     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
     623          38 :       .addReg(SPReg)
     624          76 :       .addImm(Amount * ST.getWavefrontSize());
     625          14 :   } else if (CalleePopAmount != 0) {
     626           0 :     llvm_unreachable("is this used?");
     627             :   }
     628             : 
     629          52 :   return MBB.erase(I);
     630             : }
     631             : 
     632           4 : void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
     633             :                                            MachineBasicBlock &MBB) const {
     634           4 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     635           4 :   const SIInstrInfo *TII = ST.getInstrInfo();
     636           4 :   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     637           4 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     638             : 
     639           4 :   MachineBasicBlock::iterator I = MBB.begin();
     640           8 :   DebugLoc DL;
     641             : 
     642             :   // For each dimension:
     643          16 :   for (unsigned i = 0; i < 3; ++i) {
     644             :     // Get work group ID SGPR, and make it live-in again.
     645          12 :     unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
     646          24 :     MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
     647          24 :     MBB.addLiveIn(WorkGroupIDSGPR);
     648             : 
     649             :     // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
     650             :     // order to spill it to scratch.
     651             :     unsigned WorkGroupIDVGPR =
     652          12 :       MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     653          36 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
     654          12 :       .addReg(WorkGroupIDSGPR);
     655             : 
     656             :     // Spill work group ID.
     657          12 :     int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
     658          12 :     TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
     659             :       WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
     660             : 
     661             :     // Get work item ID VGPR, and make it live-in again.
     662          24 :     unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
     663          24 :     MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
     664          24 :     MBB.addLiveIn(WorkItemIDVGPR);
     665             : 
     666             :     // Spill work item ID.
     667          12 :     int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
     668          12 :     TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
     669             :       WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
     670             :   }
     671           4 : }
     672             : 
     673       16871 : bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
     674             :   // All stack operations are relative to the frame offset SGPR.
     675             :   // TODO: Still want to eliminate sometimes.
     676       16871 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     677             : 
     678             :   // XXX - Is this only called after frame is finalized? Should be able to check
     679             :   // frame size.
     680       17974 :   return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
     681             : }
     682             : 
     683         366 : bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
     684             :   // All stack operations are relative to the frame offset SGPR.
     685         366 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     686         366 :   return MFI.hasCalls() || MFI.hasVarSizedObjects();
     687             : }

Generated by: LCOV version 1.13