LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 166 167 99.4 %
Date: 2018-06-17 00:07:59 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "SIMachineFunctionInfo.h"
      11             : #include "AMDGPUArgumentUsageInfo.h"
      12             : #include "AMDGPUSubtarget.h"
      13             : #include "SIRegisterInfo.h"
      14             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      15             : #include "Utils/AMDGPUBaseInfo.h"
      16             : #include "llvm/ADT/Optional.h"
      17             : #include "llvm/CodeGen/MachineBasicBlock.h"
      18             : #include "llvm/CodeGen/MachineFrameInfo.h"
      19             : #include "llvm/CodeGen/MachineFunction.h"
      20             : #include "llvm/CodeGen/MachineRegisterInfo.h"
      21             : #include "llvm/IR/CallingConv.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include <cassert>
      24             : #include <vector>
      25             : 
      26             : #define MAX_LANES 64
      27             : 
      28             : using namespace llvm;
      29             : 
      30       18612 : SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
      31             :   : AMDGPUMachineFunction(MF),
      32             :     PrivateSegmentBuffer(false),
      33             :     DispatchPtr(false),
      34             :     QueuePtr(false),
      35             :     KernargSegmentPtr(false),
      36             :     DispatchID(false),
      37             :     FlatScratchInit(false),
      38             :     GridWorkgroupCountX(false),
      39             :     GridWorkgroupCountY(false),
      40             :     GridWorkgroupCountZ(false),
      41             :     WorkGroupIDX(false),
      42             :     WorkGroupIDY(false),
      43             :     WorkGroupIDZ(false),
      44             :     WorkGroupInfo(false),
      45             :     PrivateSegmentWaveByteOffset(false),
      46             :     WorkItemIDX(false),
      47             :     WorkItemIDY(false),
      48             :     WorkItemIDZ(false),
      49             :     ImplicitBufferPtr(false),
      50             :     ImplicitArgPtr(false),
      51             :     GITPtrHigh(0xffffffff),
      52       93060 :     HighBitsOf32BitAddress(0) {
      53       18612 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
      54       18612 :   const Function &F = MF.getFunction();
      55       37224 :   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
      56       37224 :   WavesPerEU = ST.getWavesPerEU(F);
      57             : 
      58       18612 :   Occupancy = getMaxWavesPerEU();
      59       18612 :   limitOccupancy(MF);
      60             : 
      61       18612 :   if (!isEntryFunction()) {
      62             :     // Non-entry functions have no special inputs for now, other registers
      63             :     // required for scratch access.
      64        2013 :     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
      65        2013 :     ScratchWaveOffsetReg = AMDGPU::SGPR4;
      66        2013 :     FrameOffsetReg = AMDGPU::SGPR5;
      67        2013 :     StackPtrOffsetReg = AMDGPU::SGPR32;
      68             : 
      69        2013 :     ArgInfo.PrivateSegmentBuffer =
      70        4026 :       ArgDescriptor::createRegister(ScratchRSrcReg);
      71        2013 :     ArgInfo.PrivateSegmentWaveByteOffset =
      72        4026 :       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
      73             : 
      74        2013 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
      75          12 :       ImplicitArgPtr = true;
      76             :   } else {
      77       16599 :     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
      78          40 :       KernargSegmentPtr = true;
      79             :       assert(MaxKernArgAlign == 0);
      80          40 :       MaxKernArgAlign =  ST.getAlignmentForImplicitArgPtr();
      81             :     }
      82             :   }
      83             : 
      84             :   CallingConv::ID CC = F.getCallingConv();
      85       18612 :   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
      86       15507 :     if (!F.arg_empty())
      87       14477 :       KernargSegmentPtr = true;
      88       15507 :     WorkGroupIDX = true;
      89       15507 :     WorkItemIDX = true;
      90        3105 :   } else if (CC == CallingConv::AMDGPU_PS) {
      91         830 :     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
      92             :   }
      93             : 
      94       18612 :   if (ST.debuggerEmitPrologue()) {
      95             :     // Enable everything.
      96           4 :     WorkGroupIDX = true;
      97           4 :     WorkGroupIDY = true;
      98           4 :     WorkGroupIDZ = true;
      99           4 :     WorkItemIDX = true;
     100           4 :     WorkItemIDY = true;
     101           4 :     WorkItemIDZ = true;
     102             :   } else {
     103       18608 :     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
     104          34 :       WorkGroupIDX = true;
     105             : 
     106       18608 :     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
     107          40 :       WorkGroupIDY = true;
     108             : 
     109       18608 :     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
     110          40 :       WorkGroupIDZ = true;
     111             : 
     112       18608 :     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
     113          24 :       WorkItemIDX = true;
     114             : 
     115       18608 :     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
     116         134 :       WorkItemIDY = true;
     117             : 
     118       18608 :     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
     119          81 :       WorkItemIDZ = true;
     120             :   }
     121             : 
     122       18612 :   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     123       18612 :   bool MaySpill = ST.isVGPRSpillingEnabled(F);
     124             :   bool HasStackObjects = FrameInfo.hasStackObjects();
     125             : 
     126       18612 :   if (isEntryFunction()) {
     127             :     // X, XY, and XYZ are the only supported combinations, so make sure Y is
     128             :     // enabled if Z is.
     129       16599 :     if (WorkItemIDZ)
     130          77 :       WorkItemIDY = true;
     131             : 
     132       16599 :     if (HasStackObjects || MaySpill) {
     133       15557 :       PrivateSegmentWaveByteOffset = true;
     134             : 
     135             :     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
     136       18062 :     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
     137        2505 :         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
     138             :       ArgInfo.PrivateSegmentWaveByteOffset
     139           5 :         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
     140             :     }
     141             :   }
     142             : 
     143       18612 :   bool IsCOV2 = ST.isAmdCodeObjectV2(F);
     144       18612 :   if (IsCOV2) {
     145        2429 :     if (HasStackObjects || MaySpill)
     146        2426 :       PrivateSegmentBuffer = true;
     147             : 
     148        2429 :     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
     149          53 :       DispatchPtr = true;
     150             : 
     151        2429 :     if (F.hasFnAttribute("amdgpu-queue-ptr"))
     152          68 :       QueuePtr = true;
     153             : 
     154        2429 :     if (F.hasFnAttribute("amdgpu-dispatch-id"))
     155          15 :       DispatchID = true;
     156             :   } else if (ST.isMesaGfxShader(F)) {
     157          11 :     if (HasStackObjects || MaySpill)
     158           2 :       ImplicitBufferPtr = true;
     159             :   }
     160             : 
     161       18612 :   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
     162          30 :     KernargSegmentPtr = true;
     163             : 
     164       18612 :   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
     165             :     // TODO: This could be refined a lot. The attribute is a poor way of
     166             :     // detecting calls that may require it before argument lowering.
     167        3930 :     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
     168         362 :       FlatScratchInit = true;
     169             :   }
     170             : 
     171       18612 :   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
     172       18612 :   StringRef S = A.getValueAsString();
     173       18612 :   if (!S.empty())
     174           2 :     S.consumeInteger(0, GITPtrHigh);
     175             : 
     176       18612 :   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
     177       18612 :   S = A.getValueAsString();
     178       18612 :   if (!S.empty())
     179          16 :     S.consumeInteger(0, HighBitsOf32BitAddress);
     180       18612 : }
     181             : 
     182       36474 : void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
     183       36474 :   limitOccupancy(getMaxWavesPerEU());
     184       36474 :   const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
     185       36474 :   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
     186             :                  MF.getFunction()));
     187       36474 : }
     188             : 
     189        2051 : unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
     190             :   const SIRegisterInfo &TRI) {
     191        2051 :   ArgInfo.PrivateSegmentBuffer =
     192        4102 :     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     193        2051 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
     194        2051 :   NumUserSGPRs += 4;
     195        2051 :   return ArgInfo.PrivateSegmentBuffer.getRegister();
     196             : }
     197             : 
     198          42 : unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
     199          42 :   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     200          42 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     201          42 :   NumUserSGPRs += 2;
     202          42 :   return ArgInfo.DispatchPtr.getRegister();
     203             : }
     204             : 
     205          57 : unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
     206          57 :   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     207          57 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     208          57 :   NumUserSGPRs += 2;
     209          57 :   return ArgInfo.QueuePtr.getRegister();
     210             : }
     211             : 
     212       14448 : unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
     213             :   ArgInfo.KernargSegmentPtr
     214       14448 :     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     215       14448 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     216       14448 :   NumUserSGPRs += 2;
     217       14448 :   return ArgInfo.KernargSegmentPtr.getRegister();
     218             : }
     219             : 
     220           5 : unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
     221           5 :   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     222           5 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     223           5 :   NumUserSGPRs += 2;
     224           5 :   return ArgInfo.DispatchID.getRegister();
     225             : }
     226             : 
     227         359 : unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
     228         359 :   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     229         359 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     230         359 :   NumUserSGPRs += 2;
     231         359 :   return ArgInfo.FlatScratchInit.getRegister();
     232             : }
     233             : 
     234           2 : unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
     235           2 :   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
     236           2 :     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
     237           2 :   NumUserSGPRs += 2;
     238           2 :   return ArgInfo.ImplicitBufferPtr.getRegister();
     239             : }
     240             : 
     241             : static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
     242        5455 :   for (unsigned I = 0; CSRegs[I]; ++I) {
     243        2763 :     if (CSRegs[I] == Reg)
     244             :       return true;
     245             :   }
     246             : 
     247             :   return false;
     248             : }
     249             : 
     250             : /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
     251        1345 : bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
     252             :                                                     int FI) {
     253        1345 :   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
     254             : 
     255             :   // This has already been allocated.
     256        1345 :   if (!SpillLanes.empty())
     257             :     return true;
     258             : 
     259         668 :   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
     260             :   const SIRegisterInfo *TRI = ST.getRegisterInfo();
     261         668 :   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
     262         668 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     263         668 :   unsigned WaveSize = ST.getWavefrontSize();
     264             : 
     265        1336 :   unsigned Size = FrameInfo.getObjectSize(FI);
     266             :   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
     267             :   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
     268             : 
     269         668 :   int NumLanes = Size / 4;
     270             : 
     271         668 :   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
     272             : 
     273             :   // Make sure to handle the case where a wide SGPR spill may span between two
     274             :   // VGPRs.
     275        3082 :   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
     276             :     unsigned LaneVGPR;
     277        1211 :     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
     278             : 
     279        1211 :     if (VGPRIndex == 0) {
     280         140 :       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
     281         140 :       if (LaneVGPR == AMDGPU::NoRegister) {
     282             :         // We have no VGPRs left for spilling SGPRs. Reset because we will not
     283             :         // partially spill the SGPR to VGPRs.
     284           4 :         SGPRToVGPRSpills.erase(FI);
     285           4 :         NumVGPRSpillLanes -= I;
     286             :         return false;
     287             :       }
     288             : 
     289             :       Optional<int> CSRSpillFI;
     290         225 :       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
     291          89 :           isCalleeSavedReg(CSRegs, LaneVGPR)) {
     292          80 :         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
     293             :       }
     294             : 
     295         272 :       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
     296             : 
     297             :       // Add this register as live-in to all blocks to avoid machine verifer
     298             :       // complaining about use of an undefined physical register.
     299         451 :       for (MachineBasicBlock &BB : MF)
     300         315 :         BB.addLiveIn(LaneVGPR);
     301             :     } else {
     302        1071 :       LaneVGPR = SpillVGPRs.back().VGPR;
     303             :     }
     304             : 
     305        2414 :     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
     306             :   }
     307             : 
     308             :   return true;
     309             : }
     310             : 
     311         132 : void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
     312         928 :   for (auto &R : SGPRToVGPRSpills)
     313         664 :     MFI.RemoveStackObject(R.first);
     314         132 : }
     315             : 
     316             : 
     317             : /// \returns VGPR used for \p Dim' work item ID.
     318          12 : unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
     319          12 :   switch (Dim) {
     320             :   case 0:
     321             :     assert(hasWorkItemIDX());
     322             :     return AMDGPU::VGPR0;
     323           4 :   case 1:
     324             :     assert(hasWorkItemIDY());
     325           4 :     return AMDGPU::VGPR1;
     326           4 :   case 2:
     327             :     assert(hasWorkItemIDZ());
     328           4 :     return AMDGPU::VGPR2;
     329             :   }
     330           0 :   llvm_unreachable("unexpected dimension");
     331             : }
     332             : 
     333       16964 : MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
     334             :   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     335       16964 :   return AMDGPU::SGPR0 + NumUserSGPRs;
     336             : }
     337             : 
     338       30774 : MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
     339       30774 :   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
     340             : }

Generated by: LCOV version 1.13