LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIFrameLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 299 318 94.0 %
Date: 2018-06-17 00:07:59 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===----------------------- SIFrameLowering.cpp --------------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //==-----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIFrameLowering.h"
      11             : #include "AMDGPUSubtarget.h"
      12             : #include "SIInstrInfo.h"
      13             : #include "SIMachineFunctionInfo.h"
      14             : #include "SIRegisterInfo.h"
      15             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      16             : 
      17             : #include "llvm/CodeGen/LivePhysRegs.h"
      18             : #include "llvm/CodeGen/MachineFrameInfo.h"
      19             : #include "llvm/CodeGen/MachineFunction.h"
      20             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      21             : #include "llvm/CodeGen/RegisterScavenging.h"
      22             : 
      23             : using namespace llvm;
      24             : 
      25             : 
      26             : static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
      27             :                                          const MachineFunction &MF) {
      28             :   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
      29         395 :                       ST.getMaxNumSGPRs(MF) / 4);
      30             : }
      31             : 
      32             : static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
      33             :                                        const MachineFunction &MF) {
      34             :   return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
      35         725 :                       ST.getMaxNumSGPRs(MF));
      36             : }
      37             : 
      38         359 : void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
      39             :                                           MachineFunction &MF,
      40             :                                           MachineBasicBlock &MBB) const {
      41             :   const SIInstrInfo *TII = ST.getInstrInfo();
      42             :   const SIRegisterInfo* TRI = &TII->getRegisterInfo();
      43         359 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
      44             : 
      45             :   // We don't need this if we only have spills since there is no user facing
      46             :   // scratch.
      47             : 
      48             :   // TODO: If we know we don't have flat instructions earlier, we can omit
      49             :   // this from the input registers.
      50             :   //
      51             :   // TODO: We only need to know if we access scratch space through a flat
      52             :   // pointer. Because we only detect if flat instructions are used at all,
      53             :   // this will be used more often than necessary on VI.
      54             : 
      55             :   // Debug location must be unknown since the first debug location is used to
      56             :   // determine the end of the prologue.
      57         359 :   DebugLoc DL;
      58         359 :   MachineBasicBlock::iterator I = MBB.begin();
      59             : 
      60             :   unsigned FlatScratchInitReg
      61             :     = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
      62             : 
      63         359 :   MachineRegisterInfo &MRI = MF.getRegInfo();
      64             :   MRI.addLiveIn(FlatScratchInitReg);
      65         359 :   MBB.addLiveIn(FlatScratchInitReg);
      66             : 
      67         359 :   unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
      68         359 :   unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
      69             : 
      70         359 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
      71             : 
      72             :   // Do a 64-bit pointer add.
      73         359 :   if (ST.flatScratchIsPointer()) {
      74         198 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
      75          66 :       .addReg(FlatScrInitLo)
      76          66 :       .addReg(ScratchWaveOffsetReg);
      77         198 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
      78          66 :       .addReg(FlatScrInitHi)
      79             :       .addImm(0);
      80             : 
      81             :     return;
      82             :   }
      83             : 
      84             :   // Copy the size in bytes.
      85         879 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
      86         293 :     .addReg(FlatScrInitHi, RegState::Kill);
      87             : 
      88             :   // Add wave offset in bytes to private base offset.
      89             :   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
      90         879 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
      91         293 :     .addReg(FlatScrInitLo)
      92         293 :     .addReg(ScratchWaveOffsetReg);
      93             : 
      94             :   // Convert offset to 256-byte units.
      95         879 :   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
      96         293 :     .addReg(FlatScrInitLo, RegState::Kill)
      97             :     .addImm(8);
      98             : }
      99             : 
     100       16449 : unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
     101             :   const SISubtarget &ST,
     102             :   const SIInstrInfo *TII,
     103             :   const SIRegisterInfo *TRI,
     104             :   SIMachineFunctionInfo *MFI,
     105             :   MachineFunction &MF) const {
     106       16449 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     107             : 
     108             :   // We need to insert initialization of the scratch resource descriptor.
     109       16449 :   unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
     110       32898 :   if (ScratchRsrcReg == AMDGPU::NoRegister ||
     111       16449 :       !MRI.isPhysRegUsed(ScratchRsrcReg))
     112             :     return AMDGPU::NoRegister;
     113             : 
     114        1549 :   if (ST.hasSGPRInitBug() ||
     115         709 :       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
     116             :     return ScratchRsrcReg;
     117             : 
     118             :   // We reserved the last registers for this. Shift it down to the end of those
     119             :   // which were actually used.
     120             :   //
     121             :   // FIXME: It might be safer to use a pseudoregister before replacement.
     122             : 
     123             :   // FIXME: We should be able to eliminate unused input registers. We only
     124             :   // cannot do this for the resources required for scratch access. For now we
     125             :   // skip over user SGPRs and may leave unused holes.
     126             : 
     127             :   // We find the resource first because it has an alignment requirement.
     128             : 
     129         790 :   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
     130             :   ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
     131        1185 :   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
     132             : 
     133             :   // Skip the last N reserved elements because they should have already been
     134             :   // reserved for VCC etc.
     135        2801 :   for (MCPhysReg Reg : AllSGPR128s) {
     136             :     // Pick the first unallocated one. Make sure we don't clobber the other
     137             :     // reserved input we needed.
     138        1590 :     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
     139         387 :       MRI.replaceRegWith(ScratchRsrcReg, Reg);
     140             :       MFI->setScratchRSrcReg(Reg);
     141         387 :       return Reg;
     142             :     }
     143             :   }
     144             : 
     145             :   return ScratchRsrcReg;
     146             : }
     147             : 
     148             : // Shift down registers reserved for the scratch wave offset and stack pointer
     149             : // SGPRs.
     150             : std::pair<unsigned, unsigned>
     151       16449 : SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
     152             :   const SISubtarget &ST,
     153             :   const SIInstrInfo *TII,
     154             :   const SIRegisterInfo *TRI,
     155             :   SIMachineFunctionInfo *MFI,
     156             :   MachineFunction &MF) const {
     157       16449 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     158       16449 :   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
     159             : 
     160             :   // No replacement necessary.
     161       32898 :   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
     162       16449 :       !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
     163             :     assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
     164       15588 :     return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
     165             :   }
     166             : 
     167         861 :   unsigned SPReg = MFI->getStackPtrOffsetReg();
     168         861 :   if (ST.hasSGPRInitBug())
     169             :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     170             : 
     171         725 :   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
     172             : 
     173             :   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
     174         725 :   if (NumPreloaded > AllSGPRs.size())
     175             :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     176             : 
     177             :   AllSGPRs = AllSGPRs.slice(NumPreloaded);
     178             : 
     179             :   // We need to drop register from the end of the list that we cannot use
     180             :   // for the scratch wave offset.
     181             :   // + 2 s102 and s103 do not exist on VI.
     182             :   // + 2 for vcc
     183             :   // + 2 for xnack_mask
     184             :   // + 2 for flat_scratch
     185             :   // + 4 for registers reserved for scratch resource register
     186             :   // + 1 for register reserved for scratch wave offset.  (By exluding this
     187             :   //     register from the list to consider, it means that when this
     188             :   //     register is being used for the scratch wave offset and there
     189             :   //     are no other free SGPRs, then the value will stay in this register.
     190             :   // + 1 if stack pointer is used.
     191             :   // ----
     192             :   //  13 (+1)
     193             :   unsigned ReservedRegCount = 13;
     194             : 
     195         725 :   if (AllSGPRs.size() < ReservedRegCount)
     196             :     return std::make_pair(ScratchWaveOffsetReg, SPReg);
     197             : 
     198             :   bool HandledScratchWaveOffsetReg =
     199         725 :     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
     200             : 
     201       62167 :   for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
     202             :     // Pick the first unallocated SGPR. Be careful not to pick an alias of the
     203             :     // scratch descriptor, since we haven’t added its uses yet.
     204       31178 :     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
     205       20606 :       if (!HandledScratchWaveOffsetReg) {
     206             :         HandledScratchWaveOffsetReg = true;
     207             : 
     208         457 :         MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
     209             :         MFI->setScratchWaveOffsetReg(Reg);
     210             :         ScratchWaveOffsetReg = Reg;
     211             :         break;
     212             :       }
     213             :     }
     214             :   }
     215             : 
     216             :   return std::make_pair(ScratchWaveOffsetReg, SPReg);
     217             : }
     218             : 
     219       16449 : void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
     220             :                                                 MachineBasicBlock &MBB) const {
     221             :   // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
     222             :   // specified.
     223       16449 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     224       16449 :   if (ST.debuggerEmitPrologue())
     225           4 :     emitDebuggerPrologue(MF, MBB);
     226             : 
     227             :   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
     228             : 
     229       16449 :   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     230             : 
     231             :   // If we only have SGPR spills, we won't actually be using scratch memory
     232             :   // since these spill to VGPRs.
     233             :   //
     234             :   // FIXME: We should be cleaning up these unused SGPR spill frame indices
     235             :   // somewhere.
     236             : 
     237             :   const SIInstrInfo *TII = ST.getInstrInfo();
     238             :   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     239       16449 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     240       16449 :   const Function &F = MF.getFunction();
     241             : 
     242             :   // We need to do the replacement of the private segment buffer and wave offset
     243             :   // register even if there are no stack objects. There could be stores to undef
     244             :   // or a constant without an associated object.
     245             : 
     246             :   // FIXME: We still have implicit uses on SGPR spill instructions in case they
     247             :   // need to spill to vector memory. It's likely that will not happen, but at
     248             :   // this point it appears we need the setup. This part of the prolog should be
     249             :   // emitted after frame indices are eliminated.
     250             : 
     251       16449 :   if (MFI->hasFlatScratchInit())
     252         359 :     emitFlatScratchInit(ST, MF, MBB);
     253             : 
     254       16449 :   unsigned SPReg = MFI->getStackPtrOffsetReg();
     255       16449 :   if (SPReg != AMDGPU::SP_REG) {
     256             :     assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
     257             : 
     258         347 :     DebugLoc DL;
     259         347 :     const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     260         347 :     int64_t StackSize = FrameInfo.getStackSize();
     261             : 
     262         347 :     if (StackSize == 0) {
     263         975 :       BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
     264         325 :         .addReg(MFI->getScratchWaveOffsetReg());
     265             :     } else {
     266          66 :       BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
     267          22 :         .addReg(MFI->getScratchWaveOffsetReg())
     268          22 :         .addImm(StackSize * ST.getWavefrontSize());
     269             :     }
     270             :   }
     271             : 
     272             :   unsigned ScratchRsrcReg
     273       16449 :     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
     274             : 
     275             :   unsigned ScratchWaveOffsetReg;
     276             :   std::tie(ScratchWaveOffsetReg, SPReg)
     277       32898 :     = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
     278             : 
     279             :   // It's possible to have uses of only ScratchWaveOffsetReg without
     280             :   // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
     281             :   // but the inverse is not true.
     282       16449 :   if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
     283             :     assert(ScratchRsrcReg == AMDGPU::NoRegister);
     284       15588 :     return;
     285             :   }
     286             : 
     287             :   // We need to insert initialization of the scratch resource descriptor.
     288             :   unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
     289             :     AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
     290             : 
     291             :   unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
     292         861 :   if (ST.isAmdCodeObjectV2(F)) {
     293             :     PreloadedPrivateBufferReg = MFI->getPreloadedReg(
     294             :       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
     295             :   }
     296             : 
     297         861 :   bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
     298        1701 :   bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
     299         840 :                          MRI.isPhysRegUsed(ScratchRsrcReg);
     300             : 
     301             :   // We added live-ins during argument lowering, but since they were not used
     302             :   // they were deleted. We're adding the uses now, so add them back.
     303         861 :   if (OffsetRegUsed) {
     304             :     assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
     305             :            "scratch wave offset input is required");
     306             :     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
     307         861 :     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
     308             :   }
     309             : 
     310         861 :   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
     311             :     assert(ST.isAmdCodeObjectV2(F) || ST.isMesaGfxShader(F));
     312             :     MRI.addLiveIn(PreloadedPrivateBufferReg);
     313         391 :     MBB.addLiveIn(PreloadedPrivateBufferReg);
     314             :   }
     315             : 
     316             :   // Make the register selected live throughout the function.
     317        1912 :   for (MachineBasicBlock &OtherBB : MF) {
     318        1051 :     if (&OtherBB == &MBB)
     319         861 :       continue;
     320             : 
     321         190 :     if (OffsetRegUsed)
     322         190 :       OtherBB.addLiveIn(ScratchWaveOffsetReg);
     323             : 
     324         190 :     if (ResourceRegUsed)
     325         190 :       OtherBB.addLiveIn(ScratchRsrcReg);
     326             :   }
     327             : 
     328         861 :   DebugLoc DL;
     329         861 :   MachineBasicBlock::iterator I = MBB.begin();
     330             : 
     331             :   // If we reserved the original input registers, we don't need to copy to the
     332             :   // reserved registers.
     333             : 
     334             :   bool CopyBuffer = ResourceRegUsed &&
     335         391 :     PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
     336        1252 :     ST.isAmdCodeObjectV2(F) &&
     337             :     ScratchRsrcReg != PreloadedPrivateBufferReg;
     338             : 
     339             :   // This needs to be careful of the copying order to avoid overwriting one of
     340             :   // the input registers before it's been copied to it's final
     341             :   // destination. Usually the offset should be copied first.
     342         861 :   bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
     343             :                                               ScratchWaveOffsetReg);
     344         861 :   if (CopyBuffer && CopyBufferFirst) {
     345           0 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
     346           0 :       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
     347             :   }
     348             : 
     349         861 :   if (OffsetRegUsed &&
     350             :       PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
     351        1521 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
     352         507 :       .addReg(PreloadedScratchWaveOffsetReg,
     353         507 :               MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
     354             :   }
     355             : 
     356         861 :   if (CopyBuffer && !CopyBufferFirst) {
     357          93 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
     358          31 :       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
     359             :   }
     360             : 
     361         861 :   if (ResourceRegUsed)
     362         840 :     emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
     363             :         PreloadedPrivateBufferReg, ScratchRsrcReg);
     364             : }
     365             : 
     366             : // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
     367         840 : void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
     368             :       MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
     369             :       MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
     370             :       unsigned ScratchRsrcReg) const {
     371             : 
     372             :   const SIInstrInfo *TII = ST.getInstrInfo();
     373             :   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     374         840 :   const Function &Fn = MF.getFunction();
     375         840 :   DebugLoc DL;
     376             : 
     377         840 :   if (ST.isAmdPalOS()) {
     378             :     // The pointer to the GIT is formed from the offset passed in and either
     379             :     // the amdgpu-git-ptr-high function attribute or the top part of the PC
     380           4 :     unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
     381           4 :     unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     382           4 :     unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
     383             : 
     384           4 :     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
     385             : 
     386           4 :     if (MFI->getGITPtrHigh() != 0xffffffff) {
     387           4 :       BuildMI(MBB, I, DL, SMovB32, RsrcHi)
     388           2 :         .addImm(MFI->getGITPtrHigh())
     389           2 :         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     390             :     } else {
     391             :       const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
     392           2 :       BuildMI(MBB, I, DL, GetPC64, Rsrc01);
     393             :     }
     394             :     auto GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
     395           4 :     if (ST.hasMergedShaders()) {
     396           2 :       switch (MF.getFunction().getCallingConv()) {
     397           1 :         case CallingConv::AMDGPU_HS:
     398             :         case CallingConv::AMDGPU_GS:
     399             :           // Low GIT address is passed in s8 rather than s0 for an LS+HS or
     400             :           // ES+GS merged shader on gfx9+.
     401             :           GitPtrLo = AMDGPU::SGPR8;
     402           1 :           break;
     403             :         default:
     404             :           break;
     405             :       }
     406             :     }
     407           4 :     MF.getRegInfo().addLiveIn(GitPtrLo);
     408           4 :     MF.front().addLiveIn(GitPtrLo);
     409           8 :     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
     410           4 :       .addReg(GitPtrLo)
     411           4 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     412             : 
     413             :     // We now have the GIT ptr - now get the scratch descriptor from the entry
     414             :     // at offset 0 (or offset 16 for a compute shader).
     415             :     PointerType *PtrTy =
     416           4 :       PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
     417           4 :                        AMDGPUAS::CONSTANT_ADDRESS);
     418           4 :     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
     419           4 :     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
     420           4 :     auto MMO = MF.getMachineMemOperand(PtrInfo,
     421             :                                        MachineMemOperand::MOLoad |
     422             :                                        MachineMemOperand::MOInvariant |
     423             :                                        MachineMemOperand::MODereferenceable,
     424           4 :                                        0, 0);
     425           4 :     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
     426           8 :     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
     427           4 :       .addReg(Rsrc01)
     428           4 :       .addImm(Offset) // offset
     429             :       .addImm(0) // glc
     430           4 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
     431             :       .addMemOperand(MMO);
     432             :     return;
     433             :   }
     434             :   if (ST.isMesaGfxShader(Fn)
     435         834 :       || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
     436             :     assert(!ST.isAmdCodeObjectV2(Fn));
     437         445 :     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
     438             : 
     439         445 :     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
     440         445 :     unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
     441             : 
     442             :     // Use relocations to get the pointer, and setup the other bits manually.
     443         445 :     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
     444             : 
     445         445 :     if (MFI->hasImplicitBufferPtr()) {
     446           2 :       unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
     447             : 
     448           4 :       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
     449           1 :         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
     450             : 
     451           2 :         BuildMI(MBB, I, DL, Mov64, Rsrc01)
     452           1 :           .addReg(MFI->getImplicitBufferPtrUserSGPR())
     453           1 :           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     454             :       } else {
     455           1 :         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
     456             : 
     457             :         PointerType *PtrTy =
     458           1 :           PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()),
     459           1 :                            AMDGPUAS::CONSTANT_ADDRESS);
     460           1 :         MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
     461           1 :         auto MMO = MF.getMachineMemOperand(PtrInfo,
     462             :                                            MachineMemOperand::MOLoad |
     463             :                                            MachineMemOperand::MOInvariant |
     464             :                                            MachineMemOperand::MODereferenceable,
     465           1 :                                            0, 0);
     466           2 :         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
     467           1 :           .addReg(MFI->getImplicitBufferPtrUserSGPR())
     468             :           .addImm(0) // offset
     469             :           .addImm(0) // glc
     470             :           .addMemOperand(MMO)
     471           1 :           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     472             :       }
     473             :     } else {
     474         443 :       unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
     475         443 :       unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     476             : 
     477         886 :       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
     478             :         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
     479         443 :         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     480             : 
     481         886 :       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
     482             :         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
     483         443 :         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     484             : 
     485             :     }
     486             : 
     487         890 :     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
     488         445 :       .addImm(Rsrc23 & 0xffffffff)
     489         445 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     490             : 
     491         890 :     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
     492         445 :       .addImm(Rsrc23 >> 32)
     493         445 :       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
     494             :   }
     495             : }
     496             : 
     497             : // Find a scratch register that we can use at the start of the prologue to
     498             : // re-align the stack pointer.  We avoid using callee-save registers since they
     499             : // may appear to be free when this is called from canUseAsPrologue (during
     500             : // shrink wrapping), but then no longer be free when this is called from
     501             : // emitPrologue.
     502             : //
     503             : // FIXME: This is a bit conservative, since in the above case we could use one
     504             : // of the callee-save registers as a scratch temp to re-align the stack pointer,
     505             : // but we would then have to make sure that we were in fact saving at least one
     506             : // callee-save register in the prologue, which is additional complexity that
     507             : // doesn't seem worth the benefit.
     508           3 : static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
     509           3 :   MachineFunction *MF = MBB.getParent();
     510             : 
     511           3 :   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
     512             :   const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
     513           3 :   LivePhysRegs LiveRegs(TRI);
     514           3 :   LiveRegs.addLiveIns(MBB);
     515             : 
     516             :   // Mark callee saved registers as used so we will not choose them.
     517           3 :   const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
     518        1779 :   for (unsigned i = 0; CSRegs[i]; ++i)
     519         888 :     LiveRegs.addReg(CSRegs[i]);
     520             : 
     521           3 :   MachineRegisterInfo &MRI = MF->getRegInfo();
     522             : 
     523          42 :   for (unsigned Reg : AMDGPU::SReg_32_XM0RegClass) {
     524          21 :     if (LiveRegs.available(MRI, Reg))
     525             :       return Reg;
     526             :   }
     527             : 
     528             :   return AMDGPU::NoRegister;
     529             : }
     530             : 
     531       17857 : void SIFrameLowering::emitPrologue(MachineFunction &MF,
     532             :                                    MachineBasicBlock &MBB) const {
     533       17857 :   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     534       17857 :   if (FuncInfo->isEntryFunction()) {
     535       16449 :     emitEntryFunctionPrologue(MF, MBB);
     536       16449 :     return;
     537             :   }
     538             : 
     539        1408 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     540        1408 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     541             :   const SIInstrInfo *TII = ST.getInstrInfo();
     542             :   const SIRegisterInfo &TRI = TII->getRegisterInfo();
     543             : 
     544        1408 :   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     545        1408 :   unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
     546             : 
     547        1408 :   MachineBasicBlock::iterator MBBI = MBB.begin();
     548        1408 :   DebugLoc DL;
     549             : 
     550             :   // XXX - Is this the right predicate?
     551             : 
     552        1408 :   bool NeedFP = hasFP(MF);
     553        1408 :   uint32_t NumBytes = MFI.getStackSize();
     554             :   uint32_t RoundedSize = NumBytes;
     555        1408 :   const bool NeedsRealignment = TRI.needsStackRealignment(MF);
     556             : 
     557        1408 :   if (NeedsRealignment) {
     558             :     assert(NeedFP);
     559           3 :     const unsigned Alignment = MFI.getMaxAlignment();
     560             : 
     561           3 :     RoundedSize += Alignment;
     562             : 
     563           3 :     unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(MBB);
     564             :     assert(ScratchSPReg != AMDGPU::NoRegister);
     565             : 
     566             :     // s_add_u32 tmp_reg, s32, NumBytes
     567             :     // s_and_b32 s32, tmp_reg, 0b111...0000
     568           9 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg)
     569           3 :       .addReg(StackPtrReg)
     570           3 :       .addImm((Alignment - 1) * ST.getWavefrontSize())
     571             :       .setMIFlag(MachineInstr::FrameSetup);
     572           9 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
     573           3 :       .addReg(ScratchSPReg, RegState::Kill)
     574           3 :       .addImm(-Alignment * ST.getWavefrontSize())
     575             :       .setMIFlag(MachineInstr::FrameSetup);
     576             :     FuncInfo->setIsStackRealigned(true);
     577        1405 :   } else if (NeedFP) {
     578             :     // If we need a base pointer, set it up here. It's whatever the value of
     579             :     // the stack pointer is at this point. Any variable size objects will be
     580             :     // allocated after this, so we can still use the base pointer to reference
     581             :     // locals.
     582         759 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
     583         253 :       .addReg(StackPtrReg)
     584             :       .setMIFlag(MachineInstr::FrameSetup);
     585             :   }
     586             : 
     587        1408 :   if (RoundedSize != 0 && hasSP(MF)) {
     588         243 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
     589          81 :       .addReg(StackPtrReg)
     590          81 :       .addImm(RoundedSize * ST.getWavefrontSize())
     591             :       .setMIFlag(MachineInstr::FrameSetup);
     592             :   }
     593             : 
     594          89 :   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
     595        1497 :          : FuncInfo->getSGPRSpillVGPRs()) {
     596          89 :     if (!Reg.FI.hasValue())
     597           9 :       continue;
     598          80 :     TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
     599             :                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
     600             :                              &TII->getRegisterInfo());
     601             :   }
     602             : }
     603             : 
     604       17841 : void SIFrameLowering::emitEpilogue(MachineFunction &MF,
     605             :                                    MachineBasicBlock &MBB) const {
     606       17841 :   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     607       17841 :   if (FuncInfo->isEntryFunction())
     608       16441 :     return;
     609             : 
     610        1400 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     611             :   const SIInstrInfo *TII = ST.getInstrInfo();
     612        1400 :   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
     613             : 
     614          89 :   for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
     615        1489 :          : FuncInfo->getSGPRSpillVGPRs()) {
     616          89 :     if (!Reg.FI.hasValue())
     617           9 :       continue;
     618          80 :     TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
     619             :                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
     620             :                               &TII->getRegisterInfo());
     621             :   }
     622             : 
     623        1400 :   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     624        1400 :   if (StackPtrReg == AMDGPU::NoRegister)
     625             :     return;
     626             : 
     627        1400 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     628        1400 :   uint32_t NumBytes = MFI.getStackSize();
     629             : 
     630        1400 :   DebugLoc DL;
     631             : 
     632             :   // FIXME: Clarify distinction between no set SP and SP. For callee functions,
     633             :   // it's really whether we need SP to be accurate or not.
     634             : 
     635        1400 :   if (NumBytes != 0 && hasSP(MF)) {
     636          84 :     uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
     637           3 :       NumBytes + MFI.getMaxAlignment() : NumBytes;
     638             : 
     639         243 :     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
     640          81 :       .addReg(StackPtrReg)
     641          81 :       .addImm(RoundedSize * ST.getWavefrontSize());
     642             :   }
     643             : }
     644             : 
     645             : static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
     646        2119 :   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
     647        2119 :        I != E; ++I) {
     648        2022 :     if (!MFI.isDeadObjectIndex(I))
     649             :       return false;
     650             :   }
     651             : 
     652             :   return true;
     653             : }
     654             : 
     655          22 : int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
     656             :                                             unsigned &FrameReg) const {
     657          22 :   const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
     658             : 
     659          22 :   FrameReg = RI->getFrameRegister(MF);
     660          44 :   return MF.getFrameInfo().getObjectOffset(FI);
     661             : }
     662             : 
     663       17857 : void SIFrameLowering::processFunctionBeforeFrameFinalized(
     664             :   MachineFunction &MF,
     665             :   RegScavenger *RS) const {
     666       17857 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     667             : 
     668       17857 :   if (!MFI.hasStackObjects())
     669             :     return;
     670             : 
     671         691 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     672             :   const SIInstrInfo *TII = ST.getInstrInfo();
     673             :   const SIRegisterInfo &TRI = TII->getRegisterInfo();
     674         691 :   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
     675             :   bool AllSGPRSpilledToVGPRs = false;
     676             : 
     677         691 :   if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
     678             :     AllSGPRSpilledToVGPRs = true;
     679             : 
     680             :     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
     681             :     // are spilled to VGPRs, in which case we can eliminate the stack usage.
     682             :     //
     683             :     // XXX - This operates under the assumption that only other SGPR spills are
     684             :     // users of the frame index. I'm not 100% sure this is correct. The
     685             :     // StackColoring pass has a comment saying a future improvement would be to
     686             :     // merging of allocas with spill slots, but for now according to
     687             :     // MachineFrameInfo isSpillSlot can't alias any other object.
     688         413 :     for (MachineBasicBlock &MBB : MF) {
     689             :       MachineBasicBlock::iterator Next;
     690        5736 :       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
     691             :         MachineInstr &MI = *I;
     692             :         Next = std::next(I);
     693             : 
     694        5455 :         if (TII->isSGPRSpill(MI)) {
     695        1345 :           int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
     696             :           assert(MFI.getStackID(FI) == SIStackID::SGPR_SPILL);
     697        1345 :           if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
     698        1341 :             bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
     699             :             (void)Spilled;
     700             :             assert(Spilled && "failed to spill SGPR to VGPR when allocated");
     701             :           } else
     702             :             AllSGPRSpilledToVGPRs = false;
     703             :         }
     704             :       }
     705             :     }
     706             : 
     707         132 :     FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
     708             :   }
     709             : 
     710             :   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
     711             :   // but currently hasNonSpillStackObjects is set only from source
     712             :   // allocas. Stack temps produced from legalization are not counted currently.
     713        1035 :   if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
     714         803 :       !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
     715             :     assert(RS && "RegScavenger required if spilling");
     716             : 
     717             :     // We force this to be at offset 0 so no user object ever has 0 as an
     718             :     // address, so we may use 0 as an invalid pointer value. This is because
     719             :     // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
     720             :     // is required to be address space 0, we are forced to accept this for
     721             :     // now. Ideally we could have the stack in another address space with 0 as a
     722             :     // valid pointer, and -1 as the null value.
     723             :     //
     724             :     // This will also waste additional space when user stack objects require > 4
     725             :     // byte alignment.
     726             :     //
     727             :     // The main cost here is losing the offset for addressing modes. However
     728             :     // this also ensures we shouldn't need a register for the offset when
     729             :     // emergency scavenging.
     730         647 :     int ScavengeFI = MFI.CreateFixedObject(
     731         647 :       TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
     732             :     RS->addScavengingFrameIndex(ScavengeFI);
     733             :   }
     734             : }
     735             : 
     736       35422 : void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
     737             :                                            RegScavenger *RS) const {
     738       35422 :   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
     739       35422 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     740             : 
     741             :   // The SP is specifically managed and we don't want extra spills of it.
     742       35422 :   SavedRegs.reset(MFI->getStackPtrOffsetReg());
     743       35422 : }
     744             : 
     745         906 : MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
     746             :   MachineFunction &MF,
     747             :   MachineBasicBlock &MBB,
     748             :   MachineBasicBlock::iterator I) const {
     749         906 :   int64_t Amount = I->getOperand(0).getImm();
     750         906 :   if (Amount == 0)
     751         906 :     return MBB.erase(I);
     752             : 
     753           0 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     754             :   const SIInstrInfo *TII = ST.getInstrInfo();
     755             :   const DebugLoc &DL = I->getDebugLoc();
     756           0 :   unsigned Opc = I->getOpcode();
     757           0 :   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
     758           0 :   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
     759             : 
     760           0 :   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
     761           0 :   if (!TFI->hasReservedCallFrame(MF)) {
     762           0 :     unsigned Align = getStackAlignment();
     763             : 
     764           0 :     Amount = alignTo(Amount, Align);
     765             :     assert(isUInt<32>(Amount) && "exceeded stack address space size");
     766           0 :     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     767           0 :     unsigned SPReg = MFI->getStackPtrOffsetReg();
     768             : 
     769           0 :     unsigned Op = IsDestroy ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
     770           0 :     BuildMI(MBB, I, DL, TII->get(Op), SPReg)
     771           0 :       .addReg(SPReg)
     772           0 :       .addImm(Amount * ST.getWavefrontSize());
     773           0 :   } else if (CalleePopAmount != 0) {
     774           0 :     llvm_unreachable("is this used?");
     775             :   }
     776             : 
     777           0 :   return MBB.erase(I);
     778             : }
     779             : 
     780           4 : void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
     781             :                                            MachineBasicBlock &MBB) const {
     782           4 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     783             :   const SIInstrInfo *TII = ST.getInstrInfo();
     784             :   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
     785           4 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
     786             : 
     787           4 :   MachineBasicBlock::iterator I = MBB.begin();
     788           4 :   DebugLoc DL;
     789             : 
     790             :   // For each dimension:
     791          28 :   for (unsigned i = 0; i < 3; ++i) {
     792             :     // Get work group ID SGPR, and make it live-in again.
     793             :     unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
     794          12 :     MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
     795          12 :     MBB.addLiveIn(WorkGroupIDSGPR);
     796             : 
     797             :     // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
     798             :     // order to spill it to scratch.
     799             :     unsigned WorkGroupIDVGPR =
     800          24 :       MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
     801          36 :     BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
     802          12 :       .addReg(WorkGroupIDSGPR);
     803             : 
     804             :     // Spill work group ID.
     805             :     int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
     806          12 :     TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
     807             :       WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
     808             : 
     809             :     // Get work item ID VGPR, and make it live-in again.
     810          12 :     unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
     811          12 :     MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
     812          12 :     MBB.addLiveIn(WorkItemIDVGPR);
     813             : 
     814             :     // Spill work item ID.
     815             :     int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
     816          12 :     TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
     817             :       WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
     818             :   }
     819           4 : }
     820             : 
     821       20702 : bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
     822             :   // All stack operations are relative to the frame offset SGPR.
     823             :   // TODO: Still want to eliminate sometimes.
     824       20702 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     825             : 
     826             :   // XXX - Is this only called after frame is finalized? Should be able to check
     827             :   // frame size.
     828       21952 :   return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
     829             : }
     830             : 
     831         512 : bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
     832         512 :   const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
     833             :   // All stack operations are relative to the frame offset SGPR.
     834         512 :   const MachineFrameInfo &MFI = MF.getFrameInfo();
     835         512 :   return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
     836             : }

Generated by: LCOV version 1.13