LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 112 119 94.1 %
Date: 2018-06-17 00:07:59 Functions: 16 20 80.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      15             : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      16             : 
      17             : #include "AMDGPUArgumentUsageInfo.h"
      18             : #include "AMDGPUMachineFunction.h"
      19             : #include "SIInstrInfo.h"
      20             : #include "SIRegisterInfo.h"
      21             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      22             : #include "llvm/ADT/ArrayRef.h"
      23             : #include "llvm/ADT/DenseMap.h"
      24             : #include "llvm/ADT/Optional.h"
      25             : #include "llvm/ADT/SmallVector.h"
      26             : #include "llvm/CodeGen/PseudoSourceValue.h"
      27             : #include "llvm/CodeGen/TargetInstrInfo.h"
      28             : #include "llvm/MC/MCRegisterInfo.h"
      29             : #include "llvm/Support/ErrorHandling.h"
      30             : #include <array>
      31             : #include <cassert>
      32             : #include <utility>
      33             : #include <vector>
      34             : 
      35             : namespace llvm {
      36             : 
      37             : class MachineFrameInfo;
      38             : class MachineFunction;
      39             : class TargetRegisterClass;
      40             : 
      41         759 : class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
      42             : public:
      43             :   // TODO: Is the img rsrc useful?
      44         759 :   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
      45         759 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
      46             : 
      47        4350 :   bool isConstant(const MachineFrameInfo *) const override {
      48             :     // This should probably be true for most images, but we will start by being
      49             :     // conservative.
      50        4350 :     return false;
      51             :   }
      52             : 
      53        1265 :   bool isAliased(const MachineFrameInfo *) const override {
      54        1265 :     return true;
      55             :   }
      56             : 
      57         387 :   bool mayAlias(const MachineFrameInfo *) const override {
      58         387 :     return true;
      59             :   }
      60             : };
      61             : 
      62         467 : class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
      63             : public:
      64         467 :   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
      65         467 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
      66             : 
      67        1722 :   bool isConstant(const MachineFrameInfo *) const override {
      68             :     // This should probably be true for most images, but we will start by being
      69             :     // conservative.
      70        1722 :     return false;
      71             :   }
      72             : 
      73         714 :   bool isAliased(const MachineFrameInfo *) const override {
      74         714 :     return true;
      75             :   }
      76             : 
      77           8 :   bool mayAlias(const MachineFrameInfo *) const override {
      78           8 :     return true;
      79             :   }
      80             : };
      81             : 
      82             : /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
      83             : /// tells the hardware which interpolation parameters to load.
      84       93020 : class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
      85             :   unsigned TIDReg = AMDGPU::NoRegister;
      86             : 
      87             :   // Registers that may be reserved for spilling purposes. These may be the same
      88             :   // as the input registers.
      89             :   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
      90             :   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
      91             : 
      92             :   // This is the current function's incremented size from the kernel's scratch
      93             :   // wave offset register. For an entry function, this is exactly the same as
      94             :   // the ScratchWaveOffsetReg.
      95             :   unsigned FrameOffsetReg = AMDGPU::FP_REG;
      96             : 
      97             :   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
      98             :   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
      99             : 
     100             :   AMDGPUFunctionArgInfo ArgInfo;
     101             : 
     102             :   // Graphics info.
     103             :   unsigned PSInputAddr = 0;
     104             :   unsigned PSInputEnable = 0;
     105             : 
     106             :   /// Number of bytes of arguments this function has on the stack. If the callee
     107             :   /// is expected to restore the argument stack this should be a multiple of 16,
     108             :   /// all usable during a tail call.
     109             :   ///
     110             :   /// The alternative would forbid tail call optimisation in some cases: if we
     111             :   /// want to transfer control from a function with 8-bytes of stack-argument
     112             :   /// space to a function with 16-bytes then misalignment of this value would
     113             :   /// make a stack adjustment necessary, which could not be undone by the
     114             :   /// callee.
     115             :   unsigned BytesInStackArgArea = 0;
     116             : 
     117             :   bool ReturnsVoid = true;
     118             : 
     119             :   // A pair of default/requested minimum/maximum flat work group sizes.
     120             :   // Minimum - first, maximum - second.
     121             :   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
     122             : 
     123             :   // A pair of default/requested minimum/maximum number of waves per execution
     124             :   // unit. Minimum - first, maximum - second.
     125             :   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
     126             : 
     127             :   // Stack object indices for work group IDs.
     128             :   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
     129             : 
     130             :   // Stack object indices for work item IDs.
     131             :   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
     132             : 
     133             :   DenseMap<const Value *,
     134             :            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
     135             :   DenseMap<const Value *,
     136             :            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
     137             : 
     138             : private:
     139             :   unsigned LDSWaveSpillSize = 0;
     140             :   unsigned NumUserSGPRs = 0;
     141             :   unsigned NumSystemSGPRs = 0;
     142             : 
     143             :   bool HasSpilledSGPRs = false;
     144             :   bool HasSpilledVGPRs = false;
     145             :   bool HasNonSpillStackObjects = false;
     146             :   bool IsStackRealigned = false;
     147             : 
     148             :   unsigned NumSpilledSGPRs = 0;
     149             :   unsigned NumSpilledVGPRs = 0;
     150             : 
     151             :   // Feature bits required for inputs passed in user SGPRs.
     152             :   bool PrivateSegmentBuffer : 1;
     153             :   bool DispatchPtr : 1;
     154             :   bool QueuePtr : 1;
     155             :   bool KernargSegmentPtr : 1;
     156             :   bool DispatchID : 1;
     157             :   bool FlatScratchInit : 1;
     158             :   bool GridWorkgroupCountX : 1;
     159             :   bool GridWorkgroupCountY : 1;
     160             :   bool GridWorkgroupCountZ : 1;
     161             : 
     162             :   // Feature bits required for inputs passed in system SGPRs.
     163             :   bool WorkGroupIDX : 1; // Always initialized.
     164             :   bool WorkGroupIDY : 1;
     165             :   bool WorkGroupIDZ : 1;
     166             :   bool WorkGroupInfo : 1;
     167             :   bool PrivateSegmentWaveByteOffset : 1;
     168             : 
     169             :   bool WorkItemIDX : 1; // Always initialized.
     170             :   bool WorkItemIDY : 1;
     171             :   bool WorkItemIDZ : 1;
     172             : 
     173             :   // Private memory buffer
     174             :   // Compute directly in sgpr[0:1]
     175             :   // Other shaders indirect 64-bits at sgpr[0:1]
     176             :   bool ImplicitBufferPtr : 1;
     177             : 
     178             :   // Pointer to where the ABI inserts special kernel arguments separate from the
     179             :   // user arguments. This is an offset from the KernargSegmentPtr.
     180             :   bool ImplicitArgPtr : 1;
     181             : 
     182             :   // The hard-wired high half of the address of the global information table
     183             :   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
     184             :   // current hardware only allows a 16 bit value.
     185             :   unsigned GITPtrHigh;
     186             : 
     187             :   unsigned HighBitsOf32BitAddress;
     188             : 
     189             :   // Current recorded maximum possible occupancy.
     190             :   unsigned Occupancy;
     191             : 
     192             :   MCPhysReg getNextUserSGPR() const;
     193             : 
     194             :   MCPhysReg getNextSystemSGPR() const;
     195             : 
     196             : public:
     197             :   struct SpilledReg {
     198             :     unsigned VGPR = 0;
     199             :     int Lane = -1;
     200             : 
     201             :     SpilledReg() = default;
     202        1207 :     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
     203             : 
     204             :     bool hasLane() { return Lane != -1;}
     205             :     bool hasReg() { return VGPR != 0;}
     206             :   };
     207             : 
     208         138 :   struct SGPRSpillVGPRCSR {
     209             :     // VGPR used for SGPR spills
     210             :     unsigned VGPR;
     211             : 
     212             :     // If the VGPR is a CSR, the stack slot used to save/restore it in the
     213             :     // prolog/epilog.
     214             :     Optional<int> FI;
     215             : 
     216         136 :     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
     217             :   };
     218             : 
     219             : private:
     220             :   // SGPR->VGPR spilling support.
     221             :   using SpillRegMask = std::pair<unsigned, unsigned>;
     222             : 
     223             :   // Track VGPR + wave index for each subregister of the SGPR spilled to
     224             :   // frameindex key.
     225             :   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
     226             :   unsigned NumVGPRSpillLanes = 0;
     227             :   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
     228             : 
     229             : public:
     230             :   SIMachineFunctionInfo(const MachineFunction &MF);
     231             : 
     232        1433 :   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
     233        1433 :     auto I = SGPRToVGPRSpills.find(FrameIndex);
     234             :     return (I == SGPRToVGPRSpills.end()) ?
     235        2774 :       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
     236             :   }
     237             : 
     238             :   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
     239             :     return SpillVGPRs;
     240             :   }
     241             : 
     242             :   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
     243             :   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
     244             : 
     245             :   bool hasCalculatedTID() const { return TIDReg != 0; };
     246             :   unsigned getTIDReg() const { return TIDReg; };
     247           0 :   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
     248             : 
     249             :   unsigned getBytesInStackArgArea() const {
     250             :     return BytesInStackArgArea;
     251             :   }
     252             : 
     253             :   void setBytesInStackArgArea(unsigned Bytes) {
     254       17837 :     BytesInStackArgArea = Bytes;
     255             :   }
     256             : 
     257             :   // Add user SGPRs.
     258             :   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
     259             :   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
     260             :   unsigned addQueuePtr(const SIRegisterInfo &TRI);
     261             :   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
     262             :   unsigned addDispatchID(const SIRegisterInfo &TRI);
     263             :   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
     264             :   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
     265             : 
     266             :   // Add system SGPRs.
     267       15363 :   unsigned addWorkGroupIDX() {
     268       15363 :     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
     269       15363 :     NumSystemSGPRs += 1;
     270       15363 :     return ArgInfo.WorkGroupIDX.getRegister();
     271             :   }
     272             : 
     273          24 :   unsigned addWorkGroupIDY() {
     274          24 :     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
     275          24 :     NumSystemSGPRs += 1;
     276          24 :     return ArgInfo.WorkGroupIDY.getRegister();
     277             :   }
     278             : 
     279          24 :   unsigned addWorkGroupIDZ() {
     280          24 :     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
     281          24 :     NumSystemSGPRs += 1;
     282          24 :     return ArgInfo.WorkGroupIDZ.getRegister();
     283             :   }
     284             : 
     285           0 :   unsigned addWorkGroupInfo() {
     286           0 :     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
     287           0 :     NumSystemSGPRs += 1;
     288           0 :     return ArgInfo.WorkGroupInfo.getRegister();
     289             :   }
     290             : 
     291             :   // Add special VGPR inputs
     292             :   void setWorkItemIDX(ArgDescriptor Arg) {
     293       15381 :     ArgInfo.WorkItemIDX = Arg;
     294             :   }
     295             : 
     296             :   void setWorkItemIDY(ArgDescriptor Arg) {
     297         147 :     ArgInfo.WorkItemIDY = Arg;
     298             :   }
     299             : 
     300             :   void setWorkItemIDZ(ArgDescriptor Arg) {
     301          84 :     ArgInfo.WorkItemIDZ = Arg;
     302             :   }
     303             : 
     304       15363 :   unsigned addPrivateSegmentWaveByteOffset() {
     305             :     ArgInfo.PrivateSegmentWaveByteOffset
     306       15363 :       = ArgDescriptor::createRegister(getNextSystemSGPR());
     307       15363 :     NumSystemSGPRs += 1;
     308       15363 :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     309             :   }
     310             : 
     311             :   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
     312          45 :     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
     313             :   }
     314             : 
     315             :   bool hasPrivateSegmentBuffer() const {
     316       18514 :     return PrivateSegmentBuffer;
     317             :   }
     318             : 
     319             :   bool hasDispatchPtr() const {
     320       21963 :     return DispatchPtr;
     321             :   }
     322             : 
     323             :   bool hasQueuePtr() const {
     324       19913 :     return QueuePtr;
     325             :   }
     326             : 
     327             :   bool hasKernargSegmentPtr() const {
     328       19913 :     return KernargSegmentPtr;
     329             :   }
     330             : 
     331             :   bool hasDispatchID() const {
     332       19913 :     return DispatchID;
     333             :   }
     334             : 
     335             :   bool hasFlatScratchInit() const {
     336      166846 :     return FlatScratchInit;
     337             :   }
     338             : 
     339             :   bool hasGridWorkgroupCountX() const {
     340        2054 :     return GridWorkgroupCountX;
     341             :   }
     342             : 
     343             :   bool hasGridWorkgroupCountY() const {
     344        2054 :     return GridWorkgroupCountY;
     345             :   }
     346             : 
     347             :   bool hasGridWorkgroupCountZ() const {
     348        2054 :     return GridWorkgroupCountZ;
     349             :   }
     350             : 
     351             :   bool hasWorkGroupIDX() const {
     352       34285 :     return WorkGroupIDX;
     353             :   }
     354             : 
     355             :   bool hasWorkGroupIDY() const {
     356       34285 :     return WorkGroupIDY;
     357             :   }
     358             : 
     359             :   bool hasWorkGroupIDZ() const {
     360       34285 :     return WorkGroupIDZ;
     361             :   }
     362             : 
     363             :   bool hasWorkGroupInfo() const {
     364       32886 :     return WorkGroupInfo;
     365             :   }
     366             : 
     367             :   bool hasPrivateSegmentWaveByteOffset() const {
     368       16438 :     return PrivateSegmentWaveByteOffset;
     369             :   }
     370             : 
     371             :   bool hasWorkItemIDX() const {
     372       17837 :     return WorkItemIDX;
     373             :   }
     374             : 
     375             :   bool hasWorkItemIDY() const {
     376       34209 :     return WorkItemIDY;
     377             :   }
     378             : 
     379             :   bool hasWorkItemIDZ() const {
     380       34285 :     return WorkItemIDZ;
     381             :   }
     382             : 
     383             :   bool hasImplicitArgPtr() const {
     384        1399 :     return ImplicitArgPtr;
     385             :   }
     386             : 
     387             :   bool hasImplicitBufferPtr() const {
     388       16883 :     return ImplicitBufferPtr;
     389             :   }
     390             : 
     391             :   AMDGPUFunctionArgInfo &getArgInfo() {
     392       17837 :     return ArgInfo;
     393             :   }
     394             : 
     395             :   const AMDGPUFunctionArgInfo &getArgInfo() const {
     396         487 :     return ArgInfo;
     397             :   }
     398             : 
     399             :   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
     400             :   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     401       35915 :     return ArgInfo.getPreloadedValue(Value);
     402             :   }
     403             : 
     404             :   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     405        2589 :     return ArgInfo.getPreloadedValue(Value).first->getRegister();
     406             :   }
     407             : 
     408             :   unsigned getGITPtrHigh() const {
     409             :     return GITPtrHigh;
     410             :   }
     411             : 
     412             :   unsigned get32BitAddressHighBits() const {
     413             :     return HighBitsOf32BitAddress;
     414             :   }
     415             : 
     416             :   unsigned getNumUserSGPRs() const {
     417             :     return NumUserSGPRs;
     418             :   }
     419             : 
     420             :   unsigned getNumPreloadedSGPRs() const {
     421        1180 :     return NumUserSGPRs + NumSystemSGPRs;
     422             :   }
     423             : 
     424             :   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
     425          50 :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     426             :   }
     427             : 
     428             :   /// Returns the physical register reserved for use as the resource
     429             :   /// descriptor for scratch accesses.
     430             :   unsigned getScratchRSrcReg() const {
     431             :     return ScratchRSrcReg;
     432             :   }
     433             : 
     434             :   void setScratchRSrcReg(unsigned Reg) {
     435             :     assert(Reg != 0 && "Should never be unset");
     436       16846 :     ScratchRSrcReg = Reg;
     437             :   }
     438             : 
     439             :   unsigned getScratchWaveOffsetReg() const {
     440             :     return ScratchWaveOffsetReg;
     441             :   }
     442             : 
     443             :   unsigned getFrameOffsetReg() const {
     444             :     return FrameOffsetReg;
     445             :   }
     446             : 
     447             :   void setStackPtrOffsetReg(unsigned Reg) {
     448             :     assert(Reg != 0 && "Should never be unset");
     449        1748 :     StackPtrOffsetReg = Reg;
     450             :   }
     451             : 
     452             :   // Note the unset value for this is AMDGPU::SP_REG rather than
     453             :   // NoRegister. This is mostly a workaround for MIR tests where state that
     454             :   // can't be directly computed from the function is not preserved in serialized
     455             :   // MIR.
     456             :   unsigned getStackPtrOffsetReg() const {
     457             :     return StackPtrOffsetReg;
     458             :   }
     459             : 
     460             :   void setScratchWaveOffsetReg(unsigned Reg) {
     461             :     assert(Reg != 0 && "Should never be unset");
     462       16916 :     ScratchWaveOffsetReg = Reg;
     463       16916 :     if (isEntryFunction())
     464        2775 :       FrameOffsetReg = ScratchWaveOffsetReg;
     465             :   }
     466             : 
     467             :   unsigned getQueuePtrUserSGPR() const {
     468          26 :     return ArgInfo.QueuePtr.getRegister();
     469             :   }
     470             : 
     471             :   unsigned getImplicitBufferPtrUserSGPR() const {
     472           2 :     return ArgInfo.ImplicitBufferPtr.getRegister();
     473             :   }
     474             : 
     475             :   bool hasSpilledSGPRs() const {
     476             :     return HasSpilledSGPRs;
     477             :   }
     478             : 
     479             :   void setHasSpilledSGPRs(bool Spill = true) {
     480         729 :     HasSpilledSGPRs = Spill;
     481             :   }
     482             : 
     483             :   bool hasSpilledVGPRs() const {
     484             :     return HasSpilledVGPRs;
     485             :   }
     486             : 
     487             :   void setHasSpilledVGPRs(bool Spill = true) {
     488        1185 :     HasSpilledVGPRs = Spill;
     489             :   }
     490             : 
     491             :   bool hasNonSpillStackObjects() const {
     492             :     return HasNonSpillStackObjects;
     493             :   }
     494             : 
     495             :   void setHasNonSpillStackObjects(bool StackObject = true) {
     496         347 :     HasNonSpillStackObjects = StackObject;
     497             :   }
     498             : 
     499             :   bool isStackRealigned() const {
     500             :     return IsStackRealigned;
     501             :   }
     502             : 
     503             :   void setIsStackRealigned(bool Realigned = true) {
     504           3 :     IsStackRealigned = Realigned;
     505             :   }
     506             : 
     507             :   unsigned getNumSpilledSGPRs() const {
     508             :     return NumSpilledSGPRs;
     509             :   }
     510             : 
     511             :   unsigned getNumSpilledVGPRs() const {
     512             :     return NumSpilledVGPRs;
     513             :   }
     514             : 
     515             :   void addToSpilledSGPRs(unsigned num) {
     516         723 :     NumSpilledSGPRs += num;
     517             :   }
     518             : 
     519             :   void addToSpilledVGPRs(unsigned num) {
     520        1265 :     NumSpilledVGPRs += num;
     521             :   }
     522             : 
     523             :   unsigned getPSInputAddr() const {
     524             :     return PSInputAddr;
     525             :   }
     526             : 
     527             :   unsigned getPSInputEnable() const {
     528             :     return PSInputEnable;
     529             :   }
     530             : 
     531             :   bool isPSInputAllocated(unsigned Index) const {
     532        1120 :     return PSInputAddr & (1 << Index);
     533             :   }
     534             : 
     535             :   void markPSInputAllocated(unsigned Index) {
     536        1721 :     PSInputAddr |= 1 << Index;
     537             :   }
     538             : 
     539             :   void markPSInputEnabled(unsigned Index) {
     540        1609 :     PSInputEnable |= 1 << Index;
     541             :   }
     542             : 
     543             :   bool returnsVoid() const {
     544             :     return ReturnsVoid;
     545             :   }
     546             : 
     547             :   void setIfReturnsVoid(bool Value) {
     548        2436 :     ReturnsVoid = Value;
     549             :   }
     550             : 
     551             :   /// \returns A pair of default/requested minimum/maximum flat work group sizes
     552             :   /// for this function.
     553             :   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
     554             :     return FlatWorkGroupSizes;
     555             :   }
     556             : 
     557             :   /// \returns Default/requested minimum flat work group size for this function.
     558             :   unsigned getMinFlatWorkGroupSize() const {
     559             :     return FlatWorkGroupSizes.first;
     560             :   }
     561             : 
     562             :   /// \returns Default/requested maximum flat work group size for this function.
     563             :   unsigned getMaxFlatWorkGroupSize() const {
     564             :     return FlatWorkGroupSizes.second;
     565             :   }
     566             : 
     567             :   /// \returns A pair of default/requested minimum/maximum number of waves per
     568             :   /// execution unit.
     569             :   std::pair<unsigned, unsigned> getWavesPerEU() const {
     570             :     return WavesPerEU;
     571             :   }
     572             : 
     573             :   /// \returns Default/requested minimum number of waves per execution unit.
     574             :   unsigned getMinWavesPerEU() const {
     575             :     return WavesPerEU.first;
     576             :   }
     577             : 
     578             :   /// \returns Default/requested maximum number of waves per execution unit.
     579             :   unsigned getMaxWavesPerEU() const {
     580             :     return WavesPerEU.second;
     581             :   }
     582             : 
     583             :   /// \returns Stack object index for \p Dim's work group ID.
     584             :   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
     585             :     assert(Dim < 3);
     586          12 :     return DebuggerWorkGroupIDStackObjectIndices[Dim];
     587             :   }
     588             : 
     589             :   /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
     590             :   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     591             :     assert(Dim < 3);
     592          12 :     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
     593             :   }
     594             : 
     595             :   /// \returns Stack object index for \p Dim's work item ID.
     596             :   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
     597             :     assert(Dim < 3);
     598          12 :     return DebuggerWorkItemIDStackObjectIndices[Dim];
     599             :   }
     600             : 
     601             :   /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
     602             :   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     603             :     assert(Dim < 3);
     604          12 :     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
     605             :   }
     606             : 
     607             :   /// \returns SGPR used for \p Dim's work group ID.
     608             :   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
     609          12 :     switch (Dim) {
     610           4 :     case 0:
     611             :       assert(hasWorkGroupIDX());
     612           4 :       return ArgInfo.WorkGroupIDX.getRegister();
     613           4 :     case 1:
     614             :       assert(hasWorkGroupIDY());
     615           4 :       return ArgInfo.WorkGroupIDY.getRegister();
     616           4 :     case 2:
     617             :       assert(hasWorkGroupIDZ());
     618           4 :       return ArgInfo.WorkGroupIDZ.getRegister();
     619             :     }
     620           0 :     llvm_unreachable("unexpected dimension");
     621             :   }
     622             : 
     623             :   /// \returns VGPR used for \p Dim' work item ID.
     624             :   unsigned getWorkItemIDVGPR(unsigned Dim) const;
     625             : 
     626             :   unsigned getLDSWaveSpillSize() const {
     627             :     return LDSWaveSpillSize;
     628             :   }
     629             : 
     630         467 :   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
     631             :                                                     const Value *BufferRsrc) {
     632             :     assert(BufferRsrc);
     633             :     auto PSV = BufferPSVs.try_emplace(
     634             :       BufferRsrc,
     635         934 :       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
     636         934 :     return PSV.first->second.get();
     637             :   }
     638             : 
     639         759 :   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
     640             :                                                   const Value *ImgRsrc) {
     641             :     assert(ImgRsrc);
     642             :     auto PSV = ImagePSVs.try_emplace(
     643             :       ImgRsrc,
     644        1518 :       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
     645        1518 :     return PSV.first->second.get();
     646             :   }
     647             : 
     648             :   unsigned getOccupancy() const {
     649             :     return Occupancy;
     650             :   }
     651             : 
     652             :   unsigned getMinAllowedOccupancy() const {
     653         304 :     if (!isMemoryBound() && !needsWaveLimiter())
     654         267 :       return Occupancy;
     655          37 :     return (Occupancy < 4) ? Occupancy : 4;
     656             :   }
     657             : 
     658             :   void limitOccupancy(const MachineFunction &MF);
     659             : 
     660             :   void limitOccupancy(unsigned Limit) {
     661       73235 :     if (Occupancy > Limit)
     662         276 :       Occupancy = Limit;
     663             :   }
     664             : 
     665             :   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
     666           2 :     if (Occupancy < Limit)
     667           0 :       Occupancy = Limit;
     668           2 :     limitOccupancy(MF);
     669             :   }
     670             : };
     671             : 
     672             : } // end namespace llvm
     673             : 
     674             : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H

Generated by: LCOV version 1.13