LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 144 144 100.0 %
Date: 2017-09-14 15:23:50 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIMachineFunctionInfo.h"
      11             : #include "AMDGPUArgumentUsageInfo.h"
      12             : #include "AMDGPUSubtarget.h"
      13             : #include "SIRegisterInfo.h"
      14             : #include "Utils/AMDGPUBaseInfo.h"
      15             : #include "llvm/ADT/Optional.h"
      16             : #include "llvm/CodeGen/MachineBasicBlock.h"
      17             : #include "llvm/CodeGen/MachineFrameInfo.h"
      18             : #include "llvm/CodeGen/MachineFunction.h"
      19             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      20             : #include "llvm/IR/CallingConv.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include <cassert>
      23             : #include <vector>
      24             : 
      25             : #define MAX_LANES 64
      26             : 
      27             : using namespace llvm;
      28             : 
      29       15108 : SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      30             :   : AMDGPUMachineFunction(MF),
      31             :     PrivateSegmentBuffer(false),
      32             :     DispatchPtr(false),
      33             :     QueuePtr(false),
      34             :     KernargSegmentPtr(false),
      35             :     DispatchID(false),
      36             :     FlatScratchInit(false),
      37             :     GridWorkgroupCountX(false),
      38             :     GridWorkgroupCountY(false),
      39             :     GridWorkgroupCountZ(false),
      40             :     WorkGroupIDX(false),
      41             :     WorkGroupIDY(false),
      42             :     WorkGroupIDZ(false),
      43             :     WorkGroupInfo(false),
      44             :     PrivateSegmentWaveByteOffset(false),
      45             :     WorkItemIDX(false),
      46             :     WorkItemIDY(false),
      47             :     WorkItemIDZ(false),
      48             :     ImplicitBufferPtr(false),
      49      105756 :     ImplicitArgPtr(false) {
      50       15108 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
      51       15108 :   const Function *F = MF.getFunction();
      52       30216 :   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
      53       30216 :   WavesPerEU = ST.getWavesPerEU(*F);
      54             : 
      55       15108 :   if (!isEntryFunction()) {
      56             :     // Non-entry functions have no special inputs for now, other registers
      57             :     // required for scratch access.
      58         807 :     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
      59         807 :     ScratchWaveOffsetReg = AMDGPU::SGPR4;
      60         807 :     FrameOffsetReg = AMDGPU::SGPR5;
      61         807 :     StackPtrOffsetReg = AMDGPU::SGPR32;
      62             : 
      63         807 :     ArgInfo.PrivateSegmentBuffer =
      64        2421 :       ArgDescriptor::createRegister(ScratchRSrcReg);
      65         807 :     ArgInfo.PrivateSegmentWaveByteOffset =
      66        2421 :       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
      67             : 
      68        1614 :     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
      69           9 :       ImplicitArgPtr = true;
      70             :   } else {
      71       28602 :     if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
      72          28 :       KernargSegmentPtr = true;
      73             :   }
      74             : 
      75       15108 :   CallingConv::ID CC = F->getCallingConv();
      76       15108 :   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
      77       13739 :     if (!F->arg_empty())
      78       12809 :       KernargSegmentPtr = true;
      79       13739 :     WorkGroupIDX = true;
      80       13739 :     WorkItemIDX = true;
      81        1369 :   } else if (CC == CallingConv::AMDGPU_PS) {
      82         443 :     PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
      83             :   }
      84             : 
      85       15108 :   if (ST.debuggerEmitPrologue()) {
      86             :     // Enable everything.
      87           4 :     WorkGroupIDX = true;
      88           4 :     WorkGroupIDY = true;
      89           4 :     WorkGroupIDZ = true;
      90           4 :     WorkItemIDX = true;
      91           4 :     WorkItemIDY = true;
      92           4 :     WorkItemIDZ = true;
      93             :   } else {
      94       30208 :     if (F->hasFnAttribute("amdgpu-work-group-id-x"))
      95          34 :       WorkGroupIDX = true;
      96             : 
      97       30208 :     if (F->hasFnAttribute("amdgpu-work-group-id-y"))
      98          40 :       WorkGroupIDY = true;
      99             : 
     100       30208 :     if (F->hasFnAttribute("amdgpu-work-group-id-z"))
     101          40 :       WorkGroupIDZ = true;
     102             : 
     103       30208 :     if (F->hasFnAttribute("amdgpu-work-item-id-x"))
     104          19 :       WorkItemIDX = true;
     105             : 
     106       30208 :     if (F->hasFnAttribute("amdgpu-work-item-id-y"))
     107          99 :       WorkItemIDY = true;
     108             : 
     109       30208 :     if (F->hasFnAttribute("amdgpu-work-item-id-z"))
     110          62 :       WorkItemIDZ = true;
     111             :   }
     112             : 
     113       15108 :   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     114       15108 :   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
     115       15108 :   bool HasStackObjects = FrameInfo.hasStackObjects();
     116             : 
     117       15108 :   if (isEntryFunction()) {
     118             :     // X, XY, and XYZ are the only supported combinations, so make sure Y is
     119             :     // enabled if Z is.
     120       14301 :     if (WorkItemIDZ)
     121          58 :       WorkItemIDY = true;
     122             : 
     123       14301 :     if (HasStackObjects || MaySpill) {
     124       13787 :       PrivateSegmentWaveByteOffset = true;
     125             : 
     126             :     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
     127       15227 :     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
     128        1440 :         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
     129             :       ArgInfo.PrivateSegmentWaveByteOffset
     130           4 :         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
     131             :     }
     132             :   }
     133             : 
     134       15108 :   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
     135       15108 :   if (IsCOV2) {
     136        2010 :     if (HasStackObjects || MaySpill)
     137        2007 :       PrivateSegmentBuffer = true;
     138             : 
     139        4020 :     if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
     140          35 :       DispatchPtr = true;
     141             : 
     142        4020 :     if (F->hasFnAttribute("amdgpu-queue-ptr"))
     143          68 :       QueuePtr = true;
     144             : 
     145        4020 :     if (F->hasFnAttribute("amdgpu-dispatch-id"))
     146          15 :       DispatchID = true;
     147       13106 :   } else if (ST.isMesaGfxShader(MF)) {
     148           8 :     if (HasStackObjects || MaySpill)
     149           2 :       ImplicitBufferPtr = true;
     150             :   }
     151             : 
     152       30216 :   if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
     153          36 :     KernargSegmentPtr = true;
     154             : 
     155       15108 :   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
     156             :     // TODO: This could be refined a lot. The attribute is a poor way of
     157             :     // detecting calls that may require it before argument lowering.
     158        3338 :     if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
     159         332 :       FlatScratchInit = true;
     160             :   }
     161       15108 : }
     162             : 
     163        1742 : unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
     164             :   const SIRegisterInfo &TRI) {
     165        1742 :   ArgInfo.PrivateSegmentBuffer =
     166        3484 :     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     167        5226 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
     168        1742 :   NumUserSGPRs += 4;
     169        1742 :   return ArgInfo.PrivateSegmentBuffer.getRegister();
     170             : }
     171             : 
     172          25 : unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
     173          25 :   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     174          75 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     175          25 :   NumUserSGPRs += 2;
     176          25 :   return ArgInfo.DispatchPtr.getRegister();
     177             : }
     178             : 
     179          57 : unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
     180          57 :   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     181         171 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     182          57 :   NumUserSGPRs += 2;
     183          57 :   return ArgInfo.QueuePtr.getRegister();
     184             : }
     185             : 
     186       12785 : unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
     187             :   ArgInfo.KernargSegmentPtr
     188       12785 :     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     189       25570 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     190       12785 :   NumUserSGPRs += 2;
     191       12785 :   return ArgInfo.KernargSegmentPtr.getRegister();
     192             : }
     193             : 
     194           5 : unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
     195           5 :   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     196          15 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     197           5 :   NumUserSGPRs += 2;
     198           5 :   return ArgInfo.DispatchID.getRegister();
     199             : }
     200             : 
     201         332 : unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
     202         332 :   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     203         996 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     204         332 :   NumUserSGPRs += 2;
     205         332 :   return ArgInfo.FlatScratchInit.getRegister();
     206             : }
     207             : 
     208           2 : unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
     209           2 :   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     210           6 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     211           2 :   NumUserSGPRs += 2;
     212           2 :   return ArgInfo.ImplicitBufferPtr.getRegister();
     213             : }
     214             : 
     215             : static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
     216          88 :   for (unsigned I = 0; CSRegs[I]; ++I) {
     217          88 :     if (CSRegs[I] == Reg)
     218             :       return true;
     219             :   }
     220             : 
     221             :   return false;
     222             : }
     223             : 
     224             : /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
     225        1094 : bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
     226             :                                                     int FI) {
     227        2188 :   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
     228             : 
     229             :   // This has already been allocated.
     230        1094 :   if (!SpillLanes.empty())
     231             :     return true;
     232             : 
     233         542 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     234         542 :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     235         542 :   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     236         542 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     237         542 :   unsigned WaveSize = ST.getWavefrontSize();
     238             : 
     239        1084 :   unsigned Size = FrameInfo.getObjectSize(FI);
     240             :   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
     241             :   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
     242             : 
     243         542 :   int NumLanes = Size / 4;
     244             : 
     245         542 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
     246             : 
     247             :   // Make sure to handle the case where a wide SGPR spill may span between two
     248             :   // VGPRs.
     249        1470 :   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
     250             :     unsigned LaneVGPR;
     251         932 :     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
     252             : 
     253         932 :     if (VGPRIndex == 0) {
     254         130 :       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
     255         130 :       if (LaneVGPR == AMDGPU::NoRegister) {
     256             :         // We have no VGPRs left for spilling SGPRs. Reset because we will not
     257             :         // partially spill the SGPR to VGPRs.
     258           4 :         SGPRToVGPRSpills.erase(FI);
     259           4 :         NumVGPRSpillLanes -= I;
     260             :         return false;
     261             :       }
     262             : 
     263         252 :       Optional<int> CSRSpillFI;
     264         201 :       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
     265             :         // TODO: Should this be a CreateSpillStackObject? This is technically a
     266             :         // weird CSR spill.
     267         150 :         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
     268             :       }
     269             : 
     270         630 :       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
     271             : 
     272             :       // Add this register as live-in to all blocks to avoid machine verifer
     273             :       // complaining about use of an undefined physical register.
     274         669 :       for (MachineBasicBlock &BB : MF)
     275         582 :         BB.addLiveIn(LaneVGPR);
     276             :     } else {
     277        1604 :       LaneVGPR = SpillVGPRs.back().VGPR;
     278             :     }
     279             : 
     280        2784 :     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
     281             :   }
     282             : 
     283             :   return true;
     284             : }
     285             : 
     286         122 : void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
     287         904 :   for (auto &R : SGPRToVGPRSpills)
     288        1076 :     MFI.RemoveStackObject(R.first);
     289         122 : }

Generated by: LCOV version 1.13