LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUSubtarget.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 191 198 96.5 %
Date: 2017-09-14 15:23:50 Functions: 17 17 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : /// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
      12             : //
      13             : //===----------------------------------------------------------------------===//
      14             : 
      15             : #include "AMDGPUSubtarget.h"
      16             : #include "AMDGPU.h"
      17             : #include "AMDGPUTargetMachine.h"
      18             : #include "AMDGPUCallLowering.h"
      19             : #include "AMDGPUInstructionSelector.h"
      20             : #include "AMDGPULegalizerInfo.h"
      21             : #include "AMDGPURegisterBankInfo.h"
      22             : #include "SIMachineFunctionInfo.h"
      23             : #include "llvm/ADT/SmallString.h"
      24             : #include "llvm/CodeGen/MachineScheduler.h"
      25             : #include "llvm/IR/MDBuilder.h"
      26             : #include "llvm/Target/TargetFrameLowering.h"
      27             : #include <algorithm>
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "amdgpu-subtarget"
      32             : 
      33             : #define GET_SUBTARGETINFO_TARGET_DESC
      34             : #define GET_SUBTARGETINFO_CTOR
      35             : #include "AMDGPUGenSubtargetInfo.inc"
      36             : 
      37             : AMDGPUSubtarget::~AMDGPUSubtarget() = default;
      38             : 
      39             : AMDGPUSubtarget &
      40        2049 : AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
      41             :                                                  StringRef GPU, StringRef FS) {
      42             :   // Determine default and user-specified characteristics
      43             :   // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
      44             :   // enabled, but some instructions do not respect them and they run at the
      45             :   // double precision rate, so don't enable by default.
      46             :   //
      47             :   // We want to be able to turn these off, but making this a subtarget feature
      48             :   // for SI has the unhelpful behavior that it unsets everything else if you
      49             :   // disable it.
      50             : 
      51        6147 :   SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
      52        2049 :   if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
      53         696 :     FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
      54             : 
      55        2049 :   FullFS += FS;
      56             : 
      57        2049 :   ParseSubtargetFeatures(GPU, FullFS);
      58             : 
      59             :   // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
      60             :   // on VI and newer hardware to avoid assertion failures due to missing ADDR64
      61             :   // variants of MUBUF instructions.
      62        2820 :   if (!hasAddr64() && !FS.contains("flat-for-global")) {
      63         435 :     FlatForGlobal = true;
      64             :   }
      65             : 
      66             :   // FIXME: I don't think think Evergreen has any useful support for
      67             :   // denormals, but should be checked. Should we issue a warning somewhere
      68             :   // if someone tries to enable these?
      69        2049 :   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
      70         271 :     FP64FP16Denormals = false;
      71         271 :     FP32Denormals = false;
      72             :   }
      73             : 
      74             :   // Set defaults if needed.
      75        2049 :   if (MaxPrivateElementSize == 0)
      76        2028 :     MaxPrivateElementSize = 4;
      77             : 
      78        2049 :   if (LDSBankCount == 0)
      79         861 :     LDSBankCount = 32;
      80             : 
      81        2049 :   if (TT.getArch() == Triple::amdgcn) {
      82        1778 :     if (LocalMemorySize == 0)
      83         590 :       LocalMemorySize = 32768;
      84             : 
      85             :     // Do something sensible for unspecified target.
      86        1778 :     if (!HasMovrel && !HasVGPRIndexMode)
      87         590 :       HasMovrel = true;
      88             :   }
      89             : 
      90        4098 :   return *this;
      91             : }
      92             : 
      93        2049 : AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
      94        2049 :                                  const TargetMachine &TM)
      95             :   : AMDGPUGenSubtargetInfo(TT, GPU, FS),
      96             :     TargetTriple(TT),
      97        2049 :     Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
      98             :     IsaVersion(ISAVersion0_0_0),
      99             :     WavefrontSize(64),
     100             :     LocalMemorySize(0),
     101             :     LDSBankCount(0),
     102             :     MaxPrivateElementSize(0),
     103             : 
     104             :     FastFMAF32(false),
     105             :     HalfRate64Ops(false),
     106             : 
     107             :     FP32Denormals(false),
     108             :     FP64FP16Denormals(false),
     109             :     FPExceptions(false),
     110             :     DX10Clamp(false),
     111             :     FlatForGlobal(false),
     112             :     AutoWaitcntBeforeBarrier(false),
     113             :     UnalignedScratchAccess(false),
     114             :     UnalignedBufferAccess(false),
     115             : 
     116             :     HasApertureRegs(false),
     117             :     EnableXNACK(false),
     118             :     TrapHandler(false),
     119             :     DebuggerInsertNops(false),
     120             :     DebuggerReserveRegs(false),
     121             :     DebuggerEmitPrologue(false),
     122             : 
     123             :     EnableVGPRSpilling(false),
     124             :     EnablePromoteAlloca(false),
     125             :     EnableLoadStoreOpt(false),
     126             :     EnableUnsafeDSOffsetFolding(false),
     127             :     EnableSIScheduler(false),
     128             :     DumpCode(false),
     129             : 
     130             :     FP64(false),
     131             :     IsGCN(false),
     132             :     GCN3Encoding(false),
     133             :     CIInsts(false),
     134             :     GFX9Insts(false),
     135             :     SGPRInitBug(false),
     136             :     HasSMemRealTime(false),
     137             :     Has16BitInsts(false),
     138             :     HasIntClamp(false),
     139             :     HasVOP3PInsts(false),
     140             :     HasMovrel(false),
     141             :     HasVGPRIndexMode(false),
     142             :     HasScalarStores(false),
     143             :     HasInv2PiInlineImm(false),
     144             :     HasSDWA(false),
     145             :     HasSDWAOmod(false),
     146             :     HasSDWAScalar(false),
     147             :     HasSDWASdst(false),
     148             :     HasSDWAMac(false),
     149             :     HasSDWAOutModsVOPC(false),
     150             :     HasDPP(false),
     151             :     FlatAddressSpace(false),
     152             :     FlatInstOffsets(false),
     153             :     FlatGlobalInsts(false),
     154             :     FlatScratchInsts(false),
     155             :     AddNoCarryInsts(false),
     156             : 
     157             :     R600ALUInst(false),
     158             :     CaymanISA(false),
     159             :     CFALUBug(false),
     160             :     HasVertexCache(false),
     161             :     TexVTXClauseSize(0),
     162             :     ScalarizeGlobal(false),
     163             : 
     164             :     FeatureDisable(false),
     165        6147 :     InstrItins(getInstrItineraryForCPU(GPU)) {
     166        4098 :   AS = AMDGPU::getAMDGPUAS(TT);
     167        2049 :   initializeSubtargetDependencies(TT, GPU, FS);
     168        2049 : }
     169             : 
     170       15071 : unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
     171             :   const Function &F) const {
     172       15071 :   if (NWaves == 1)
     173          26 :     return getLocalMemorySize();
     174       15045 :   unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
     175       15045 :   unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
     176       15045 :   unsigned MaxWaves = getMaxWavesPerEU();
     177       15045 :   return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
     178             : }
     179             : 
     180      123408 : unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
     181             :   const Function &F) const {
     182      123408 :   unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
     183      123408 :   unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
     184      123408 :   unsigned MaxWaves = getMaxWavesPerEU();
     185      123408 :   unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
     186      123408 :   unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
     187      123408 :   NumWaves = std::min(NumWaves, MaxWaves);
     188      246816 :   NumWaves = std::max(NumWaves, 1u);
     189      123408 :   return NumWaves;
     190             : }
     191             : 
     192      187391 : std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
     193             :   const Function &F) const {
     194             :   // Default minimum/maximum flat work group sizes.
     195             :   std::pair<unsigned, unsigned> Default =
     196      187391 :     AMDGPU::isCompute(F.getCallingConv()) ?
     197      179830 :       std::pair<unsigned, unsigned>(getWavefrontSize() * 2,
     198      179830 :                                     getWavefrontSize() * 4) :
     199      194952 :       std::pair<unsigned, unsigned>(1, getWavefrontSize());
     200             : 
     201             :   // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
     202             :   // starts using "amdgpu-flat-work-group-size" attribute.
     203      374782 :   Default.second = AMDGPU::getIntegerAttribute(
     204      187391 :     F, "amdgpu-max-work-group-size", Default.second);
     205      187391 :   Default.first = std::min(Default.first, Default.second);
     206             : 
     207             :   // Requested minimum/maximum flat work group sizes.
     208      374782 :   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
     209      749564 :     F, "amdgpu-flat-work-group-size", Default);
     210             : 
     211             :   // Make sure requested minimum is less than requested maximum.
     212      187391 :   if (Requested.first > Requested.second)
     213           0 :     return Default;
     214             : 
     215             :   // Make sure requested values do not violate subtarget's specifications.
     216      374782 :   if (Requested.first < getMinFlatWorkGroupSize())
     217           0 :     return Default;
     218      374782 :   if (Requested.second > getMaxFlatWorkGroupSize())
     219           0 :     return Default;
     220             : 
     221      187391 :   return Requested;
     222             : }
     223             : 
     224       30179 : std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
     225             :   const Function &F) const {
     226             :   // Default minimum/maximum number of waves per execution unit.
     227       60358 :   std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
     228             : 
     229             :   // Default/requested minimum/maximum flat work group sizes.
     230       30179 :   std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
     231             : 
     232             :   // If minimum/maximum flat work group sizes were explicitly requested using
     233             :   // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
     234             :   // number of waves per execution unit to values implied by requested
     235             :   // minimum/maximum flat work group sizes.
     236             :   unsigned MinImpliedByFlatWorkGroupSize =
     237       60358 :     getMaxWavesPerEU(FlatWorkGroupSizes.second);
     238       30179 :   bool RequestedFlatWorkGroupSize = false;
     239             : 
     240             :   // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
     241             :   // starts using "amdgpu-flat-work-group-size" attribute.
     242       90523 :   if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
     243       60330 :       F.hasFnAttribute("amdgpu-flat-work-group-size")) {
     244             :     Default.first = MinImpliedByFlatWorkGroupSize;
     245             :     RequestedFlatWorkGroupSize = true;
     246             :   }
     247             : 
     248             :   // Requested minimum/maximum number of waves per execution unit.
     249       60358 :   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
     250      120716 :     F, "amdgpu-waves-per-eu", Default, true);
     251             : 
     252             :   // Make sure requested minimum is less than requested maximum.
     253       30179 :   if (Requested.second && Requested.first > Requested.second)
     254           0 :     return Default;
     255             : 
     256             :   // Make sure requested values do not violate subtarget's specifications.
     257       90537 :   if (Requested.first < getMinWavesPerEU() ||
     258       60358 :       Requested.first > getMaxWavesPerEU())
     259           0 :     return Default;
     260       60358 :   if (Requested.second > getMaxWavesPerEU())
     261           0 :     return Default;
     262             : 
     263             :   // Make sure requested values are compatible with values implied by requested
     264             :   // minimum/maximum flat work group sizes.
     265       30179 :   if (RequestedFlatWorkGroupSize &&
     266             :       Requested.first < MinImpliedByFlatWorkGroupSize)
     267           2 :     return Default;
     268             : 
     269       30177 :   return Requested;
     270             : }
     271             : 
     272        3255 : bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
     273        3255 :   Function *Kernel = I->getParent()->getParent();
     274        3255 :   unsigned MinSize = 0;
     275        3255 :   unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
     276        3255 :   bool IdQuery = false;
     277             : 
     278             :   // If reqd_work_group_size is present it narrows value down.
     279        3214 :   if (auto *CI = dyn_cast<CallInst>(I)) {
     280        3214 :     const Function *F = CI->getCalledFunction();
     281             :     if (F) {
     282        3214 :       unsigned Dim = UINT_MAX;
     283        3214 :       switch (F->getIntrinsicID()) {
     284        2718 :       case Intrinsic::amdgcn_workitem_id_x:
     285             :       case Intrinsic::r600_read_tidig_x:
     286        2718 :         IdQuery = true;
     287             :         LLVM_FALLTHROUGH;
     288             :       case Intrinsic::r600_read_local_size_x:
     289             :         Dim = 0;
     290             :         break;
     291         171 :       case Intrinsic::amdgcn_workitem_id_y:
     292             :       case Intrinsic::r600_read_tidig_y:
     293         171 :         IdQuery = true;
     294             :         LLVM_FALLTHROUGH;
     295             :       case Intrinsic::r600_read_local_size_y:
     296             :         Dim = 1;
     297             :         break;
     298         132 :       case Intrinsic::amdgcn_workitem_id_z:
     299             :       case Intrinsic::r600_read_tidig_z:
     300         132 :         IdQuery = true;
     301             :         LLVM_FALLTHROUGH;
     302             :       case Intrinsic::r600_read_local_size_z:
     303             :         Dim = 2;
     304             :         break;
     305             :       default:
     306             :         break;
     307             :       }
     308             :       if (Dim <= 3) {
     309        6428 :         if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
     310           6 :           if (Node->getNumOperands() == 3)
     311           6 :             MinSize = MaxSize = mdconst::extract<ConstantInt>(
     312          18 :                                   Node->getOperand(Dim))->getZExtValue();
     313             :       }
     314             :     }
     315             :   }
     316             : 
     317        3255 :   if (!MaxSize)
     318             :     return false;
     319             : 
     320             :   // Range metadata is [Lo, Hi). For ID query we need to pass max size
     321             :   // as Hi. For size query we need to pass Hi + 1.
     322        3255 :   if (IdQuery)
     323             :     MinSize = 0;
     324             :   else
     325         234 :     ++MaxSize;
     326             : 
     327        6510 :   MDBuilder MDB(I->getContext());
     328        9765 :   MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
     329       13020 :                                                   APInt(32, MaxSize));
     330        3255 :   I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
     331        3255 :   return true;
     332             : }
     333             : 
     334         253 : R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
     335         253 :                              const TargetMachine &TM) :
     336             :   AMDGPUSubtarget(TT, GPU, FS, TM),
     337             :   InstrInfo(*this),
     338             :   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
     339         506 :   TLInfo(TM, *this) {}
     340             : 
     341        1796 : SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     342        1796 :                          const TargetMachine &TM)
     343             :     : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this),
     344             :       FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
     345       10776 :       TLInfo(TM, *this) {
     346        3592 :   CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
     347        3592 :   Legalizer.reset(new AMDGPULegalizerInfo());
     348             : 
     349        5388 :   RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
     350        1796 :   InstSelector.reset(new AMDGPUInstructionSelector(
     351        3592 :       *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get())));
     352        1796 : }
     353             : 
     354       35457 : void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     355             :                                       unsigned NumRegionInstrs) const {
     356             :   // Track register pressure so the scheduler can try to decrease
     357             :   // pressure once register usage is above the threshold defined by
     358             :   // SIRegisterInfo::getRegPressureSetLimit()
     359       35457 :   Policy.ShouldTrackPressure = true;
     360             : 
     361             :   // Enabling both top down and bottom up scheduling seems to give us less
     362             :   // register spills than just using one of these approaches on its own.
     363       35457 :   Policy.OnlyTopDown = false;
     364       35457 :   Policy.OnlyBottomUp = false;
     365             : 
     366             :   // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.
     367       35457 :   if (!enableSIScheduler())
     368       35454 :     Policy.ShouldTrackLaneMasks = true;
     369       35457 : }
     370             : 
     371       17733 : bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
     372       35074 :   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
     373             : }
     374             : 
     375        1745 : unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
     376             :                                             unsigned ExplicitArgBytes) const {
     377        1745 :   unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
     378        1745 :   if (ImplicitBytes == 0)
     379             :     return ExplicitArgBytes;
     380             : 
     381         190 :   unsigned Alignment = getAlignmentForImplicitArgPtr();
     382         190 :   return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
     383             : }
     384             : 
     385         979 : unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
     386         979 :   if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
     387         495 :     if (SGPRs <= 80)
     388             :       return 10;
     389          83 :     if (SGPRs <= 88)
     390             :       return 9;
     391          83 :     if (SGPRs <= 100)
     392             :       return 8;
     393          72 :     return 7;
     394             :   }
     395         484 :   if (SGPRs <= 48)
     396             :     return 10;
     397         128 :   if (SGPRs <= 56)
     398             :     return 9;
     399         118 :   if (SGPRs <= 64)
     400             :     return 8;
     401         114 :   if (SGPRs <= 72)
     402             :     return 7;
     403          85 :   if (SGPRs <= 80)
     404             :     return 6;
     405          85 :   return 5;
     406             : }
     407             : 
     408         979 : unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
     409         979 :   if (VGPRs <= 24)
     410             :     return 10;
     411         859 :   if (VGPRs <= 28)
     412             :     return 9;
     413         840 :   if (VGPRs <= 32)
     414             :     return 8;
     415         623 :   if (VGPRs <= 36)
     416             :     return 7;
     417         353 :   if (VGPRs <= 40)
     418             :     return 6;
     419         318 :   if (VGPRs <= 48)
     420             :     return 5;
     421         302 :   if (VGPRs <= 64)
     422             :     return 4;
     423         265 :   if (VGPRs <= 84)
     424             :     return 3;
     425         162 :   if (VGPRs <= 128)
     426             :     return 2;
     427         136 :   return 1;
     428             : }
     429             : 
     430      106301 : unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
     431      106301 :   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
     432      106301 :   if (MFI.hasFlatScratchInit()) {
     433        2773 :     if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
     434             :       return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
     435        1179 :     if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
     436             :       return 4; // FLAT_SCRATCH, VCC (in that order).
     437             :   }
     438             : 
     439      103797 :   if (isXNACKEnabled())
     440             :     return 4; // XNACK, VCC (in that order).
     441       99105 :   return 2; // VCC.
     442             : }
     443             : 
     444      106261 : unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
     445      106261 :   const Function &F = *MF.getFunction();
     446      106261 :   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
     447             : 
     448             :   // Compute maximum number of SGPRs function can use using default/requested
     449             :   // minimum number of waves per execution unit.
     450             :   std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
     451      212522 :   unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
     452      212522 :   unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
     453             : 
     454             :   // Check if maximum number of SGPRs was explicitly requested using
     455             :   // "amdgpu-num-sgpr" attribute.
     456      212522 :   if (F.hasFnAttribute("amdgpu-num-sgpr")) {
     457         120 :     unsigned Requested = AMDGPU::getIntegerAttribute(
     458          40 :       F, "amdgpu-num-sgpr", MaxNumSGPRs);
     459             : 
     460             :     // Make sure requested value does not violate subtarget's specifications.
     461          40 :     if (Requested && (Requested <= getReservedNumSGPRs(MF)))
     462             :       Requested = 0;
     463             : 
     464             :     // If more SGPRs are required to support the input user/system SGPRs,
     465             :     // increase to accommodate them.
     466             :     //
     467             :     // FIXME: This really ends up using the requested number of SGPRs + number
     468             :     // of reserved special registers in total. Theoretically you could re-use
     469             :     // the last input registers for these special registers, but this would
     470             :     // require a lot of complexity to deal with the weird aliasing.
     471          40 :     unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
     472          40 :     if (Requested && Requested < InputNumSGPRs)
     473           0 :       Requested = InputNumSGPRs;
     474             : 
     475             :     // Make sure requested value is compatible with values implied by
     476             :     // default/requested minimum/maximum number of waves per execution unit.
     477          80 :     if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
     478             :       Requested = 0;
     479          40 :     if (WavesPerEU.second &&
     480         120 :         Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
     481             :       Requested = 0;
     482             : 
     483          40 :     if (Requested)
     484          40 :       MaxNumSGPRs = Requested;
     485             :   }
     486             : 
     487      106261 :   if (hasSGPRInitBug())
     488       29759 :     MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
     489             : 
     490      212522 :   return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
     491      212522 :                   MaxAddressableNumSGPRs);
     492             : }
     493             : 
     494       76332 : unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
     495       76332 :   const Function &F = *MF.getFunction();
     496       76332 :   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
     497             : 
     498             :   // Compute maximum number of VGPRs function can use using default/requested
     499             :   // minimum number of waves per execution unit.
     500             :   std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
     501      152664 :   unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
     502             : 
     503             :   // Check if maximum number of VGPRs was explicitly requested using
     504             :   // "amdgpu-num-vgpr" attribute.
     505      152664 :   if (F.hasFnAttribute("amdgpu-num-vgpr")) {
     506          18 :     unsigned Requested = AMDGPU::getIntegerAttribute(
     507           6 :       F, "amdgpu-num-vgpr", MaxNumVGPRs);
     508             : 
     509             :     // Make sure requested value does not violate subtarget's specifications.
     510          12 :     if (Requested && Requested <= getReservedNumVGPRs(MF))
     511             :       Requested = 0;
     512             : 
     513             :     // Make sure requested value is compatible with values implied by
     514             :     // default/requested minimum/maximum number of waves per execution unit.
     515          12 :     if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
     516             :       Requested = 0;
     517           6 :     if (WavesPerEU.second &&
     518          18 :         Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
     519             :       Requested = 0;
     520             : 
     521           6 :     if (Requested)
     522           6 :       MaxNumVGPRs = Requested;
     523             :   }
     524             : 
     525      152664 :   return MaxNumVGPRs - getReservedNumVGPRs(MF);
     526             : }

Generated by: LCOV version 1.13