LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 139 139 100.0 %
Date: 2018-02-18 16:14:26 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIMachineFunctionInfo.h"
      11             : #include "AMDGPUArgumentUsageInfo.h"
      12             : #include "AMDGPUSubtarget.h"
      13             : #include "SIRegisterInfo.h"
      14             : #include "Utils/AMDGPUBaseInfo.h"
      15             : #include "llvm/ADT/Optional.h"
      16             : #include "llvm/CodeGen/MachineBasicBlock.h"
      17             : #include "llvm/CodeGen/MachineFrameInfo.h"
      18             : #include "llvm/CodeGen/MachineFunction.h"
      19             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      20             : #include "llvm/IR/CallingConv.h"
      21             : #include "llvm/IR/Function.h"
      22             : #include <cassert>
      23             : #include <vector>
      24             : 
      25             : #define MAX_LANES 64
      26             : 
      27             : using namespace llvm;
      28             : 
      29       17058 : SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      30             :   : AMDGPUMachineFunction(MF),
      31             :     PrivateSegmentBuffer(false),
      32             :     DispatchPtr(false),
      33             :     QueuePtr(false),
      34             :     KernargSegmentPtr(false),
      35             :     DispatchID(false),
      36             :     FlatScratchInit(false),
      37             :     GridWorkgroupCountX(false),
      38             :     GridWorkgroupCountY(false),
      39             :     GridWorkgroupCountZ(false),
      40             :     WorkGroupIDX(false),
      41             :     WorkGroupIDY(false),
      42             :     WorkGroupIDZ(false),
      43             :     WorkGroupInfo(false),
      44             :     PrivateSegmentWaveByteOffset(false),
      45             :     WorkItemIDX(false),
      46             :     WorkItemIDY(false),
      47             :     WorkItemIDZ(false),
      48             :     ImplicitBufferPtr(false),
      49             :     ImplicitArgPtr(false),
      50             :     GITPtrHigh(0xffffffff),
      51       85290 :     HighBitsOf32BitAddress(0) {
      52             :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
      53             :   const Function &F = MF.getFunction();
      54       34116 :   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
      55       34116 :   WavesPerEU = ST.getWavesPerEU(F);
      56             : 
      57       17058 :   if (!isEntryFunction()) {
      58             :     // Non-entry functions have no special inputs for now, other registers
      59             :     // required for scratch access.
      60        1450 :     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
      61        1450 :     ScratchWaveOffsetReg = AMDGPU::SGPR4;
      62        1450 :     FrameOffsetReg = AMDGPU::SGPR5;
      63        1450 :     StackPtrOffsetReg = AMDGPU::SGPR32;
      64             : 
      65        1450 :     ArgInfo.PrivateSegmentBuffer =
      66        2900 :       ArgDescriptor::createRegister(ScratchRSrcReg);
      67        1450 :     ArgInfo.PrivateSegmentWaveByteOffset =
      68        2900 :       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
      69             : 
      70        1450 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
      71           9 :       ImplicitArgPtr = true;
      72             :   } else {
      73       15608 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
      74          34 :       KernargSegmentPtr = true;
      75             :   }
      76             : 
      77             :   CallingConv::ID CC = F.getCallingConv();
      78       17058 :   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
      79       14794 :     if (!F.arg_empty())
      80       13771 :       KernargSegmentPtr = true;
      81       14794 :     WorkGroupIDX = true;
      82       14794 :     WorkItemIDX = true;
      83        2264 :   } else if (CC == CallingConv::AMDGPU_PS) {
      84         564 :     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
      85             :   }
      86             : 
      87       17058 :   if (ST.debuggerEmitPrologue()) {
      88             :     // Enable everything.
      89           4 :     WorkGroupIDX = true;
      90           4 :     WorkGroupIDY = true;
      91           4 :     WorkGroupIDZ = true;
      92           4 :     WorkItemIDX = true;
      93           4 :     WorkItemIDY = true;
      94           4 :     WorkItemIDZ = true;
      95             :   } else {
      96       17054 :     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
      97          34 :       WorkGroupIDX = true;
      98             : 
      99       17054 :     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
     100          40 :       WorkGroupIDY = true;
     101             : 
     102       17054 :     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
     103          40 :       WorkGroupIDZ = true;
     104             : 
     105       17054 :     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
     106          21 :       WorkItemIDX = true;
     107             : 
     108       17054 :     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
     109         125 :       WorkItemIDY = true;
     110             : 
     111       17054 :     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
     112          81 :       WorkItemIDZ = true;
     113             :   }
     114             : 
     115             :   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     116       17058 :   bool MaySpill = ST.isVGPRSpillingEnabled(F);
     117             :   bool HasStackObjects = FrameInfo.hasStackObjects();
     118             : 
     119       17058 :   if (isEntryFunction()) {
     120             :     // X, XY, and XYZ are the only supported combinations, so make sure Y is
     121             :     // enabled if Z is.
     122       15608 :     if (WorkItemIDZ)
     123          77 :       WorkItemIDY = true;
     124             : 
     125       15608 :     if (HasStackObjects || MaySpill) {
     126       14842 :       PrivateSegmentWaveByteOffset = true;
     127             : 
     128             :     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
     129       16977 :     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
     130        2135 :         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
     131             :       ArgInfo.PrivateSegmentWaveByteOffset
     132           4 :         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
     133             :     }
     134             :   }
     135             : 
     136       17058 :   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
     137       17058 :   if (IsCOV2) {
     138        2177 :     if (HasStackObjects || MaySpill)
     139        2174 :       PrivateSegmentBuffer = true;
     140             : 
     141        2177 :     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
     142          53 :       DispatchPtr = true;
     143             : 
     144        2177 :     if (F.hasFnAttribute("amdgpu-queue-ptr"))
     145          68 :       QueuePtr = true;
     146             : 
     147        2177 :     if (F.hasFnAttribute("amdgpu-dispatch-id"))
     148          15 :       DispatchID = true;
     149             :   } else if (ST.isMesaGfxShader(MF)) {
     150           8 :     if (HasStackObjects || MaySpill)
     151           2 :       ImplicitBufferPtr = true;
     152             :   }
     153             : 
     154       17058 :   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
     155          36 :     KernargSegmentPtr = true;
     156             : 
     157       17058 :   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
     158             :     // TODO: This could be refined a lot. The attribute is a poor way of
     159             :     // detecting calls that may require it before argument lowering.
     160        3579 :     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
     161         352 :       FlatScratchInit = true;
     162             :   }
     163             : 
     164       17058 :   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
     165       17058 :   StringRef S = A.getValueAsString();
     166       17058 :   if (!S.empty())
     167           1 :     S.consumeInteger(0, GITPtrHigh);
     168             : 
     169       17058 :   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
     170       17058 :   S = A.getValueAsString();
     171       17058 :   if (!S.empty())
     172          16 :     S.consumeInteger(0, HighBitsOf32BitAddress);
     173       17058 : }
     174             : 
     175        1868 : unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
     176             :   const SIRegisterInfo &TRI) {
     177        1868 :   ArgInfo.PrivateSegmentBuffer =
     178        3736 :     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     179        1868 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
     180        1868 :   NumUserSGPRs += 4;
     181        1868 :   return ArgInfo.PrivateSegmentBuffer.getRegister();
     182             : }
     183             : 
     184          42 : unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
     185          42 :   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     186          42 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     187          42 :   NumUserSGPRs += 2;
     188          42 :   return ArgInfo.DispatchPtr.getRegister();
     189             : }
     190             : 
     191          57 : unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
     192          57 :   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     193             :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     194          57 :   NumUserSGPRs += 2;
     195          57 :   return ArgInfo.QueuePtr.getRegister();
     196             : }
     197             : 
     198       13744 : unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
     199             :   ArgInfo.KernargSegmentPtr
     200       13744 :     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     201       13744 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     202       13744 :   NumUserSGPRs += 2;
     203       13744 :   return ArgInfo.KernargSegmentPtr.getRegister();
     204             : }
     205             : 
     206           5 : unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
     207           5 :   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     208           5 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     209           5 :   NumUserSGPRs += 2;
     210           5 :   return ArgInfo.DispatchID.getRegister();
     211             : }
     212             : 
     213         349 : unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
     214         349 :   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     215         349 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     216         349 :   NumUserSGPRs += 2;
     217         349 :   return ArgInfo.FlatScratchInit.getRegister();
     218             : }
     219             : 
     220           2 : unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
     221           2 :   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     222           2 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     223           2 :   NumUserSGPRs += 2;
     224           2 :   return ArgInfo.ImplicitBufferPtr.getRegister();
     225             : }
     226             : 
     227             : static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
     228         115 :   for (unsigned I = 0; CSRegs[I]; ++I) {
     229          96 :     if (CSRegs[I] == Reg)
     230             :       return true;
     231             :   }
     232             : 
     233             :   return false;
     234             : }
     235             : 
     236             : /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
     237        1118 : bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
     238             :                                                     int FI) {
     239        1118 :   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
     240             : 
     241             :   // This has already been allocated.
     242        1118 :   if (!SpillLanes.empty())
     243             :     return true;
     244             : 
     245             :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     246             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     247             :   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     248             :   MachineRegisterInfo &MRI = MF.getRegInfo();
     249             :   unsigned WaveSize = ST.getWavefrontSize();
     250             : 
     251        1108 :   unsigned Size = FrameInfo.getObjectSize(FI);
     252             :   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
     253             :   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
     254             : 
     255         554 :   int NumLanes = Size / 4;
     256             : 
     257         554 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
     258             : 
     259             :   // Make sure to handle the case where a wide SGPR spill may span between two
     260             :   // VGPRs.
     261        2734 :   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
     262             :     unsigned LaneVGPR;
     263        1094 :     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
     264             : 
     265        1094 :     if (VGPRIndex == 0) {
     266         135 :       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
     267         135 :       if (LaneVGPR == AMDGPU::NoRegister) {
     268             :         // We have no VGPRs left for spilling SGPRs. Reset because we will not
     269             :         // partially spill the SGPR to VGPRs.
     270           4 :         SGPRToVGPRSpills.erase(FI);
     271           4 :         NumVGPRSpillLanes -= I;
     272             :         return false;
     273             :       }
     274             : 
     275             :       Optional<int> CSRSpillFI;
     276         208 :       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
     277             :         // TODO: Should this be a CreateSpillStackObject? This is technically a
     278             :         // weird CSR spill.
     279          77 :         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
     280             :       }
     281             : 
     282         262 :       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
     283             : 
     284             :       // Add this register as live-in to all blocks to avoid machine verifer
     285             :       // complaining about use of an undefined physical register.
     286         440 :       for (MachineBasicBlock &BB : MF)
     287         309 :         BB.addLiveIn(LaneVGPR);
     288             :     } else {
     289         959 :       LaneVGPR = SpillVGPRs.back().VGPR;
     290             :     }
     291             : 
     292        2180 :     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
     293             :   }
     294             : 
     295             :   return true;
     296             : }
     297             : 
     298         127 : void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
     299         804 :   for (auto &R : SGPRToVGPRSpills)
     300         550 :     MFI.RemoveStackObject(R.first);
     301         127 : }

Generated by: LCOV version 1.13