LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 166 167 99.4 %
Date: 2018-07-13 00:08:38 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIMachineFunctionInfo.h"
      11             : #include "AMDGPUArgumentUsageInfo.h"
      12             : #include "AMDGPUSubtarget.h"
      13             : #include "SIRegisterInfo.h"
      14             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      15             : #include "Utils/AMDGPUBaseInfo.h"
      16             : #include "llvm/ADT/Optional.h"
      17             : #include "llvm/CodeGen/MachineBasicBlock.h"
      18             : #include "llvm/CodeGen/MachineFrameInfo.h"
      19             : #include "llvm/CodeGen/MachineFunction.h"
      20             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      21             : #include "llvm/IR/CallingConv.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include <cassert>
      24             : #include <vector>
      25             : 
      26             : #define MAX_LANES 64
      27             : 
      28             : using namespace llvm;
      29             : 
      30       18696 : SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      31             :   : AMDGPUMachineFunction(MF),
      32             :     PrivateSegmentBuffer(false),
      33             :     DispatchPtr(false),
      34             :     QueuePtr(false),
      35             :     KernargSegmentPtr(false),
      36             :     DispatchID(false),
      37             :     FlatScratchInit(false),
      38             :     WorkGroupIDX(false),
      39             :     WorkGroupIDY(false),
      40             :     WorkGroupIDZ(false),
      41             :     WorkGroupInfo(false),
      42             :     PrivateSegmentWaveByteOffset(false),
      43             :     WorkItemIDX(false),
      44             :     WorkItemIDY(false),
      45             :     WorkItemIDZ(false),
      46             :     ImplicitBufferPtr(false),
      47             :     ImplicitArgPtr(false),
      48             :     GITPtrHigh(0xffffffff),
      49       93480 :     HighBitsOf32BitAddress(0) {
      50       18696 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
      51       18696 :   const Function &F = MF.getFunction();
      52       37392 :   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
      53       37392 :   WavesPerEU = ST.getWavesPerEU(F);
      54             : 
      55       18696 :   Occupancy = getMaxWavesPerEU();
      56       18696 :   limitOccupancy(MF);
      57             : 
      58       18696 :   if (!isEntryFunction()) {
      59             :     // Non-entry functions have no special inputs for now, other registers
      60             :     // required for scratch access.
      61        2043 :     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
      62        2043 :     ScratchWaveOffsetReg = AMDGPU::SGPR4;
      63        2043 :     FrameOffsetReg = AMDGPU::SGPR5;
      64        2043 :     StackPtrOffsetReg = AMDGPU::SGPR32;
      65             : 
      66        2043 :     ArgInfo.PrivateSegmentBuffer =
      67        4086 :       ArgDescriptor::createRegister(ScratchRSrcReg);
      68        2043 :     ArgInfo.PrivateSegmentWaveByteOffset =
      69        4086 :       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
      70             : 
      71        2043 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
      72          12 :       ImplicitArgPtr = true;
      73             :   } else {
      74       16653 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
      75          42 :       KernargSegmentPtr = true;
      76             :       assert(MaxKernArgAlign == 0);
      77          42 :       MaxKernArgAlign =  ST.getAlignmentForImplicitArgPtr();
      78             :     }
      79             :   }
      80             : 
      81             :   CallingConv::ID CC = F.getCallingConv();
      82       18696 :   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
      83       15380 :     if (!F.arg_empty())
      84       14343 :       KernargSegmentPtr = true;
      85       15380 :     WorkGroupIDX = true;
      86       15380 :     WorkItemIDX = true;
      87        3316 :   } else if (CC == CallingConv::AMDGPU_PS) {
      88        1009 :     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
      89             :   }
      90             : 
      91       18696 :   if (ST.debuggerEmitPrologue()) {
      92             :     // Enable everything.
      93           4 :     WorkGroupIDX = true;
      94           4 :     WorkGroupIDY = true;
      95           4 :     WorkGroupIDZ = true;
      96           4 :     WorkItemIDX = true;
      97           4 :     WorkItemIDY = true;
      98           4 :     WorkItemIDZ = true;
      99             :   } else {
     100       18692 :     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
     101          34 :       WorkGroupIDX = true;
     102             : 
     103       18692 :     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
     104          40 :       WorkGroupIDY = true;
     105             : 
     106       18692 :     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
     107          40 :       WorkGroupIDZ = true;
     108             : 
     109       18692 :     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
     110          24 :       WorkItemIDX = true;
     111             : 
     112       18692 :     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
     113         134 :       WorkItemIDY = true;
     114             : 
     115       18692 :     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
     116          81 :       WorkItemIDZ = true;
     117             :   }
     118             : 
     119       18696 :   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     120       18696 :   bool MaySpill = ST.isVGPRSpillingEnabled(F);
     121             :   bool HasStackObjects = FrameInfo.hasStackObjects();
     122             : 
     123       18696 :   if (isEntryFunction()) {
     124             :     // X, XY, and XYZ are the only supported combinations, so make sure Y is
     125             :     // enabled if Z is.
     126       16653 :     if (WorkItemIDZ)
     127          77 :       WorkItemIDY = true;
     128             : 
     129       16653 :     if (HasStackObjects || MaySpill) {
     130       15430 :       PrivateSegmentWaveByteOffset = true;
     131             : 
     132             :     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
     133       17965 :     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
     134        2535 :         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
     135             :       ArgInfo.PrivateSegmentWaveByteOffset
     136           5 :         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
     137             :     }
     138             :   }
     139             : 
     140       18696 :   bool IsCOV2 = ST.isAmdCodeObjectV2(F);
     141       18696 :   if (IsCOV2) {
     142        2825 :     if (HasStackObjects || MaySpill)
     143        2822 :       PrivateSegmentBuffer = true;
     144             : 
     145        2825 :     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
     146          53 :       DispatchPtr = true;
     147             : 
     148        2825 :     if (F.hasFnAttribute("amdgpu-queue-ptr"))
     149          68 :       QueuePtr = true;
     150             : 
     151        2825 :     if (F.hasFnAttribute("amdgpu-dispatch-id"))
     152          15 :       DispatchID = true;
     153             :   } else if (ST.isMesaGfxShader(F)) {
     154          11 :     if (HasStackObjects || MaySpill)
     155           2 :       ImplicitBufferPtr = true;
     156             :   }
     157             : 
     158       18696 :   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
     159       14087 :     KernargSegmentPtr = true;
     160             : 
     161       18696 :   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
     162             :     // TODO: This could be refined a lot. The attribute is a poor way of
     163             :     // detecting calls that may require it before argument lowering.
     164        4700 :     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
     165         360 :       FlatScratchInit = true;
     166             :   }
     167             : 
     168       18696 :   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
     169       18696 :   StringRef S = A.getValueAsString();
     170       18696 :   if (!S.empty())
     171           2 :     S.consumeInteger(0, GITPtrHigh);
     172             : 
     173       18696 :   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
     174       18696 :   S = A.getValueAsString();
     175       18696 :   if (!S.empty())
     176          16 :     S.consumeInteger(0, HighBitsOf32BitAddress);
     177       18696 : }
     178             : 
     179       36585 : void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
     180       36585 :   limitOccupancy(getMaxWavesPerEU());
     181       36585 :   const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
     182       36585 :   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
     183             :                  MF.getFunction()));
     184       36585 : }
     185             : 
     186        2436 : unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
     187             :   const SIRegisterInfo &TRI) {
     188        2436 :   ArgInfo.PrivateSegmentBuffer =
     189        4872 :     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     190        2436 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
     191        2436 :   NumUserSGPRs += 4;
     192        2436 :   return ArgInfo.PrivateSegmentBuffer.getRegister();
     193             : }
     194             : 
     195          42 : unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
     196          42 :   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     197          42 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     198          42 :   NumUserSGPRs += 2;
     199          42 :   return ArgInfo.DispatchPtr.getRegister();
     200             : }
     201             : 
     202          57 : unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
     203          57 :   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     204          57 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     205          57 :   NumUserSGPRs += 2;
     206          57 :   return ArgInfo.QueuePtr.getRegister();
     207             : }
     208             : 
     209       14312 : unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
     210             :   ArgInfo.KernargSegmentPtr
     211       14312 :     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     212       14312 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     213       14312 :   NumUserSGPRs += 2;
     214       14312 :   return ArgInfo.KernargSegmentPtr.getRegister();
     215             : }
     216             : 
     217           5 : unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
     218           5 :   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     219           5 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     220           5 :   NumUserSGPRs += 2;
     221           5 :   return ArgInfo.DispatchID.getRegister();
     222             : }
     223             : 
     224         357 : unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
     225         357 :   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     226         357 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     227         357 :   NumUserSGPRs += 2;
     228         357 :   return ArgInfo.FlatScratchInit.getRegister();
     229             : }
     230             : 
     231           2 : unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
     232           2 :   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     233           2 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     234           2 :   NumUserSGPRs += 2;
     235           2 :   return ArgInfo.ImplicitBufferPtr.getRegister();
     236             : }
     237             : 
     238             : static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
     239        5455 :   for (unsigned I = 0; CSRegs[I]; ++I) {
     240        2763 :     if (CSRegs[I] == Reg)
     241             :       return true;
     242             :   }
     243             : 
     244             :   return false;
     245             : }
     246             : 
     247             : /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
     248        1224 : bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
     249             :                                                     int FI) {
     250        1224 :   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
     251             : 
     252             :   // This has already been allocated.
     253        1224 :   if (!SpillLanes.empty())
     254             :     return true;
     255             : 
     256         599 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     257             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     258         599 :   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     259         599 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     260         599 :   unsigned WaveSize = ST.getWavefrontSize();
     261             : 
     262        1198 :   unsigned Size = FrameInfo.getObjectSize(FI);
     263             :   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
     264             :   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
     265             : 
     266         599 :   int NumLanes = Size / 4;
     267             : 
     268         599 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
     269             : 
     270             :   // Make sure to handle the case where a wide SGPR spill may span between two
     271             :   // VGPRs.
     272        2875 :   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
     273             :     unsigned LaneVGPR;
     274        1142 :     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
     275             : 
     276        1142 :     if (VGPRIndex == 0) {
     277         141 :       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
     278         141 :       if (LaneVGPR == AMDGPU::NoRegister) {
     279             :         // We have no VGPRs left for spilling SGPRs. Reset because we will not
     280             :         // partially spill the SGPR to VGPRs.
     281           4 :         SGPRToVGPRSpills.erase(FI);
     282           4 :         NumVGPRSpillLanes -= I;
     283             :         return false;
     284             :       }
     285             : 
     286             :       Optional<int> CSRSpillFI;
     287         226 :       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
     288          89 :           isCalleeSavedReg(CSRegs, LaneVGPR)) {
     289          80 :         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
     290             :       }
     291             : 
     292         274 :       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
     293             : 
     294             :       // Add this register as live-in to all blocks to avoid machine verifer
     295             :       // complaining about use of an undefined physical register.
     296         453 :       for (MachineBasicBlock &BB : MF)
     297         316 :         BB.addLiveIn(LaneVGPR);
     298             :     } else {
     299        1001 :       LaneVGPR = SpillVGPRs.back().VGPR;
     300             :     }
     301             : 
     302        2276 :     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
     303             :   }
     304             : 
     305             :   return true;
     306             : }
     307             : 
     308         133 : void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
     309         861 :   for (auto &R : SGPRToVGPRSpills)
     310         595 :     MFI.RemoveStackObject(R.first);
     311         133 : }
     312             : 
     313             : 
     314             : /// \returns VGPR used for \p Dim' work item ID.
     315          12 : unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
     316          12 :   switch (Dim) {
     317             :   case 0:
     318             :     assert(hasWorkItemIDX());
     319             :     return AMDGPU::VGPR0;
     320           4 :   case 1:
     321             :     assert(hasWorkItemIDY());
     322           4 :     return AMDGPU::VGPR1;
     323           4 :   case 2:
     324             :     assert(hasWorkItemIDZ());
     325           4 :     return AMDGPU::VGPR2;
     326             :   }
     327           0 :   llvm_unreachable("unexpected dimension");
     328             : }
     329             : 
     330       17211 : MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
     331             :   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     332       17211 :   return AMDGPU::SGPR0 + NumUserSGPRs;
     333             : }
     334             : 
     335       30438 : MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
     336       30438 :   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
     337             : }

Generated by: LCOV version 1.13