LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 95 169 56.2 %
Date: 2018-10-20 13:21:21 Functions: 9 44 20.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      15             : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      16             : 
      17             : #include "AMDGPUArgumentUsageInfo.h"
      18             : #include "AMDGPUMachineFunction.h"
      19             : #include "SIInstrInfo.h"
      20             : #include "SIRegisterInfo.h"
      21             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      22             : #include "llvm/ADT/ArrayRef.h"
      23             : #include "llvm/ADT/DenseMap.h"
      24             : #include "llvm/ADT/Optional.h"
      25             : #include "llvm/ADT/SmallVector.h"
      26             : #include "llvm/CodeGen/PseudoSourceValue.h"
      27             : #include "llvm/CodeGen/TargetInstrInfo.h"
      28             : #include "llvm/MC/MCRegisterInfo.h"
      29             : #include "llvm/Support/ErrorHandling.h"
      30             : #include <array>
      31             : #include <cassert>
      32             : #include <utility>
      33             : #include <vector>
      34             : 
      35             : namespace llvm {
      36             : 
      37             : class MachineFrameInfo;
      38             : class MachineFunction;
      39             : class TargetRegisterClass;
      40             : 
      41             : class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
      42             : public:
      43             :   // TODO: Is the img rsrc useful?
      44         709 :   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
      45         709 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
      46             : 
      47        4683 :   bool isConstant(const MachineFrameInfo *) const override {
      48             :     // This should probably be true for most images, but we will start by being
      49             :     // conservative.
      50        4683 :     return false;
      51             :   }
      52             : 
      53        1305 :   bool isAliased(const MachineFrameInfo *) const override {
      54        1305 :     return true;
      55             :   }
      56             : 
      57           3 :   bool mayAlias(const MachineFrameInfo *) const override {
      58           3 :     return true;
      59             :   }
      60             : };
      61             : 
      62             : class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
      63             : public:
      64        1113 :   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
      65        1113 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
      66             : 
      67        3710 :   bool isConstant(const MachineFrameInfo *) const override {
      68             :     // This should probably be true for most images, but we will start by being
      69             :     // conservative.
      70        3710 :     return false;
      71             :   }
      72             : 
      73        1563 :   bool isAliased(const MachineFrameInfo *) const override {
      74        1563 :     return true;
      75             :   }
      76             : 
      77           8 :   bool mayAlias(const MachineFrameInfo *) const override {
      78           8 :     return true;
      79             :   }
      80             : };
      81             : 
      82             : /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
      83             : /// tells the hardware which interpolation parameters to load.
      84             : class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
      85             :   unsigned TIDReg = AMDGPU::NoRegister;
      86             : 
      87             :   // Registers that may be reserved for spilling purposes. These may be the same
      88             :   // as the input registers.
      89             :   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
      90             :   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
      91             : 
      92             :   // This is the current function's incremented size from the kernel's scratch
      93             :   // wave offset register. For an entry function, this is exactly the same as
      94             :   // the ScratchWaveOffsetReg.
      95             :   unsigned FrameOffsetReg = AMDGPU::FP_REG;
      96             : 
      97             :   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
      98             :   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
      99             : 
     100             :   AMDGPUFunctionArgInfo ArgInfo;
     101             : 
     102             :   // Graphics info.
     103             :   unsigned PSInputAddr = 0;
     104             :   unsigned PSInputEnable = 0;
     105             : 
     106             :   /// Number of bytes of arguments this function has on the stack. If the callee
     107             :   /// is expected to restore the argument stack this should be a multiple of 16,
     108             :   /// all usable during a tail call.
     109             :   ///
     110             :   /// The alternative would forbid tail call optimisation in some cases: if we
     111             :   /// want to transfer control from a function with 8-bytes of stack-argument
     112             :   /// space to a function with 16-bytes then misalignment of this value would
     113             :   /// make a stack adjustment necessary, which could not be undone by the
     114             :   /// callee.
     115             :   unsigned BytesInStackArgArea = 0;
     116             : 
     117             :   bool ReturnsVoid = true;
     118             : 
     119             :   // A pair of default/requested minimum/maximum flat work group sizes.
     120             :   // Minimum - first, maximum - second.
     121             :   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
     122             : 
     123             :   // A pair of default/requested minimum/maximum number of waves per execution
     124             :   // unit. Minimum - first, maximum - second.
     125             :   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
     126             : 
     127             :   // Stack object indices for work group IDs.
     128             :   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
     129             : 
     130             :   // Stack object indices for work item IDs.
     131             :   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
     132             : 
     133             :   DenseMap<const Value *,
     134             :            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
     135             :   DenseMap<const Value *,
     136             :            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
     137             : 
     138             : private:
     139             :   unsigned LDSWaveSpillSize = 0;
     140             :   unsigned NumUserSGPRs = 0;
     141             :   unsigned NumSystemSGPRs = 0;
     142             : 
     143             :   bool HasSpilledSGPRs = false;
     144             :   bool HasSpilledVGPRs = false;
     145             :   bool HasNonSpillStackObjects = false;
     146             :   bool IsStackRealigned = false;
     147             : 
     148             :   unsigned NumSpilledSGPRs = 0;
     149             :   unsigned NumSpilledVGPRs = 0;
     150             : 
     151             :   // Feature bits required for inputs passed in user SGPRs.
     152             :   bool PrivateSegmentBuffer : 1;
     153             :   bool DispatchPtr : 1;
     154             :   bool QueuePtr : 1;
     155             :   bool KernargSegmentPtr : 1;
     156             :   bool DispatchID : 1;
     157             :   bool FlatScratchInit : 1;
     158             : 
     159             :   // Feature bits required for inputs passed in system SGPRs.
     160             :   bool WorkGroupIDX : 1; // Always initialized.
     161             :   bool WorkGroupIDY : 1;
     162             :   bool WorkGroupIDZ : 1;
     163             :   bool WorkGroupInfo : 1;
     164             :   bool PrivateSegmentWaveByteOffset : 1;
     165             : 
     166             :   bool WorkItemIDX : 1; // Always initialized.
     167             :   bool WorkItemIDY : 1;
     168             :   bool WorkItemIDZ : 1;
     169             : 
     170             :   // Private memory buffer
     171             :   // Compute directly in sgpr[0:1]
     172             :   // Other shaders indirect 64-bits at sgpr[0:1]
     173             :   bool ImplicitBufferPtr : 1;
     174             : 
     175             :   // Pointer to where the ABI inserts special kernel arguments separate from the
     176             :   // user arguments. This is an offset from the KernargSegmentPtr.
     177             :   bool ImplicitArgPtr : 1;
     178             : 
     179             :   // The hard-wired high half of the address of the global information table
     180             :   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
     181             :   // current hardware only allows a 16 bit value.
     182             :   unsigned GITPtrHigh;
     183             : 
     184             :   unsigned HighBitsOf32BitAddress;
     185             : 
     186             :   // Current recorded maximum possible occupancy.
     187             :   unsigned Occupancy;
     188             : 
     189             :   MCPhysReg getNextUserSGPR() const;
     190             : 
     191             :   MCPhysReg getNextSystemSGPR() const;
     192             : 
     193             : public:
     194             :   struct SpilledReg {
     195             :     unsigned VGPR = 0;
     196             :     int Lane = -1;
     197             : 
     198             :     SpilledReg() = default;
     199        1171 :     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
     200             : 
     201             :     bool hasLane() { return Lane != -1;}
     202             :     bool hasReg() { return VGPR != 0;}
     203             :   };
     204             : 
     205         148 :   struct SGPRSpillVGPRCSR {
     206             :     // VGPR used for SGPR spills
     207             :     unsigned VGPR;
     208             : 
     209             :     // If the VGPR is a CSR, the stack slot used to save/restore it in the
     210             :     // prolog/epilog.
     211             :     Optional<int> FI;
     212             : 
     213         146 :     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
     214             :   };
     215             : 
     216             : private:
     217             :   // SGPR->VGPR spilling support.
     218             :   using SpillRegMask = std::pair<unsigned, unsigned>;
     219             : 
     220             :   // Track VGPR + wave index for each subregister of the SGPR spilled to
     221             :   // frameindex key.
     222             :   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
     223             :   unsigned NumVGPRSpillLanes = 0;
     224             :   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
     225             : 
     226             : public:
     227             :   SIMachineFunctionInfo(const MachineFunction &MF);
     228             : 
     229        1370 :   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
     230        1370 :     auto I = SGPRToVGPRSpills.find(FrameIndex);
     231             :     return (I == SGPRToVGPRSpills.end()) ?
     232        1370 :       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
     233             :   }
     234             : 
     235             :   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
     236             :     return SpillVGPRs;
     237             :   }
     238             : 
     239             :   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
     240             :   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
     241             : 
     242           0 :   bool hasCalculatedTID() const { return TIDReg != 0; };
     243           0 :   unsigned getTIDReg() const { return TIDReg; };
     244           0 :   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
     245             : 
     246           0 :   unsigned getBytesInStackArgArea() const {
     247           0 :     return BytesInStackArgArea;
     248             :   }
     249             : 
     250           0 :   void setBytesInStackArgArea(unsigned Bytes) {
     251       19709 :     BytesInStackArgArea = Bytes;
     252           0 :   }
     253             : 
     254             :   // Add user SGPRs.
     255             :   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
     256             :   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
     257             :   unsigned addQueuePtr(const SIRegisterInfo &TRI);
     258             :   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
     259             :   unsigned addDispatchID(const SIRegisterInfo &TRI);
     260             :   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
     261             :   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
     262             : 
     263             :   // Add system SGPRs.
     264             :   unsigned addWorkGroupIDX() {
     265       16213 :     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
     266       16213 :     NumSystemSGPRs += 1;
     267             :     return ArgInfo.WorkGroupIDX.getRegister();
     268             :   }
     269             : 
     270             :   unsigned addWorkGroupIDY() {
     271          24 :     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
     272          24 :     NumSystemSGPRs += 1;
     273             :     return ArgInfo.WorkGroupIDY.getRegister();
     274             :   }
     275             : 
     276             :   unsigned addWorkGroupIDZ() {
     277          24 :     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
     278          24 :     NumSystemSGPRs += 1;
     279             :     return ArgInfo.WorkGroupIDZ.getRegister();
     280             :   }
     281             : 
     282             :   unsigned addWorkGroupInfo() {
     283           0 :     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
     284           0 :     NumSystemSGPRs += 1;
     285             :     return ArgInfo.WorkGroupInfo.getRegister();
     286             :   }
     287             : 
     288             :   // Add special VGPR inputs
     289             :   void setWorkItemIDX(ArgDescriptor Arg) {
     290           0 :     ArgInfo.WorkItemIDX = Arg;
     291             :   }
     292             : 
     293             :   void setWorkItemIDY(ArgDescriptor Arg) {
     294           0 :     ArgInfo.WorkItemIDY = Arg;
     295             :   }
     296             : 
     297             :   void setWorkItemIDZ(ArgDescriptor Arg) {
     298           0 :     ArgInfo.WorkItemIDZ = Arg;
     299             :   }
     300             : 
     301             :   unsigned addPrivateSegmentWaveByteOffset() {
     302             :     ArgInfo.PrivateSegmentWaveByteOffset
     303       16213 :       = ArgDescriptor::createRegister(getNextSystemSGPR());
     304       16213 :     NumSystemSGPRs += 1;
     305             :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     306             :   }
     307             : 
     308             :   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
     309          45 :     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
     310             :   }
     311             : 
     312             :   bool hasPrivateSegmentBuffer() const {
     313       20574 :     return PrivateSegmentBuffer;
     314             :   }
     315             : 
     316             :   bool hasDispatchPtr() const {
     317       23100 :     return DispatchPtr;
     318             :   }
     319             : 
     320             :   bool hasQueuePtr() const {
     321       20574 :     return QueuePtr;
     322             :   }
     323             : 
     324             :   bool hasKernargSegmentPtr() const {
     325       20574 :     return KernargSegmentPtr;
     326             :   }
     327             : 
     328             :   bool hasDispatchID() const {
     329       20574 :     return DispatchID;
     330             :   }
     331             : 
     332             :   bool hasFlatScratchInit() const {
     333      184810 :     return FlatScratchInit;
     334             :   }
     335             : 
     336             :   bool hasWorkGroupIDX() const {
     337       35910 :     return WorkGroupIDX;
     338             :   }
     339             : 
     340             :   bool hasWorkGroupIDY() const {
     341       35910 :     return WorkGroupIDY;
     342             :   }
     343             : 
     344             :   bool hasWorkGroupIDZ() const {
     345       35910 :     return WorkGroupIDZ;
     346             :   }
     347             : 
     348             :   bool hasWorkGroupInfo() const {
     349       35910 :     return WorkGroupInfo;
     350             :   }
     351             : 
     352             :   bool hasPrivateSegmentWaveByteOffset() const {
     353       17950 :     return PrivateSegmentWaveByteOffset;
     354             :   }
     355             : 
     356             :   bool hasWorkItemIDX() const {
     357           0 :     return WorkItemIDX;
     358             :   }
     359             : 
     360             :   bool hasWorkItemIDY() const {
     361       17884 :     return WorkItemIDY;
     362             :   }
     363             : 
     364             :   bool hasWorkItemIDZ() const {
     365       17960 :     return WorkItemIDZ;
     366             :   }
     367             : 
     368             :   bool hasImplicitArgPtr() const {
     369           0 :     return ImplicitArgPtr;
     370             :   }
     371             : 
     372             :   bool hasImplicitBufferPtr() const {
     373       18435 :     return ImplicitBufferPtr;
     374             :   }
     375             : 
     376             :   AMDGPUFunctionArgInfo &getArgInfo() {
     377       19709 :     return ArgInfo;
     378             :   }
     379             : 
     380             :   const AMDGPUFunctionArgInfo &getArgInfo() const {
     381         575 :     return ArgInfo;
     382             :   }
     383             : 
     384             :   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
     385             :   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     386       54901 :     return ArgInfo.getPreloadedValue(Value);
     387             :   }
     388             : 
     389             :   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     390        2436 :     return ArgInfo.getPreloadedValue(Value).first->getRegister();
     391             :   }
     392             : 
     393           0 :   unsigned getGITPtrHigh() const {
     394           0 :     return GITPtrHigh;
     395             :   }
     396             : 
     397           0 :   unsigned get32BitAddressHighBits() const {
     398           0 :     return HighBitsOf32BitAddress;
     399             :   }
     400             : 
     401           0 :   unsigned getNumUserSGPRs() const {
     402           0 :     return NumUserSGPRs;
     403             :   }
     404             : 
     405           0 :   unsigned getNumPreloadedSGPRs() const {
     406        1308 :     return NumUserSGPRs + NumSystemSGPRs;
     407             :   }
     408             : 
     409             :   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
     410          50 :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     411             :   }
     412             : 
     413             :   /// Returns the physical register reserved for use as the resource
     414             :   /// descriptor for scratch accesses.
     415           0 :   unsigned getScratchRSrcReg() const {
     416           0 :     return ScratchRSrcReg;
     417             :   }
     418             : 
     419           0 :   void setScratchRSrcReg(unsigned Reg) {
     420             :     assert(Reg != 0 && "Should never be unset");
     421       16330 :     ScratchRSrcReg = Reg;
     422           0 :   }
     423             : 
     424           0 :   unsigned getScratchWaveOffsetReg() const {
     425           0 :     return ScratchWaveOffsetReg;
     426             :   }
     427             : 
     428           0 :   unsigned getFrameOffsetReg() const {
     429           0 :     return FrameOffsetReg;
     430             :   }
     431             : 
     432           0 :   void setStackPtrOffsetReg(unsigned Reg) {
     433             :     assert(Reg != 0 && "Should never be unset");
     434        2193 :     StackPtrOffsetReg = Reg;
     435           0 :   }
     436             : 
     437             :   // Note the unset value for this is AMDGPU::SP_REG rather than
     438             :   // NoRegister. This is mostly a workaround for MIR tests where state that
     439             :   // can't be directly computed from the function is not preserved in serialized
     440             :   // MIR.
     441           0 :   unsigned getStackPtrOffsetReg() const {
     442           0 :     return StackPtrOffsetReg;
     443             :   }
     444             : 
     445             :   void setScratchWaveOffsetReg(unsigned Reg) {
     446             :     assert(Reg != 0 && "Should never be unset");
     447       18512 :     ScratchWaveOffsetReg = Reg;
     448       18512 :     if (isEntryFunction())
     449       18512 :       FrameOffsetReg = ScratchWaveOffsetReg;
     450             :   }
     451             : 
     452             :   unsigned getQueuePtrUserSGPR() const {
     453          26 :     return ArgInfo.QueuePtr.getRegister();
     454             :   }
     455             : 
     456             :   unsigned getImplicitBufferPtrUserSGPR() const {
     457           2 :     return ArgInfo.ImplicitBufferPtr.getRegister();
     458             :   }
     459             : 
     460           0 :   bool hasSpilledSGPRs() const {
     461           0 :     return HasSpilledSGPRs;
     462             :   }
     463             : 
     464             :   void setHasSpilledSGPRs(bool Spill = true) {
     465         702 :     HasSpilledSGPRs = Spill;
     466             :   }
     467             : 
     468           0 :   bool hasSpilledVGPRs() const {
     469           0 :     return HasSpilledVGPRs;
     470             :   }
     471             : 
     472             :   void setHasSpilledVGPRs(bool Spill = true) {
     473        1240 :     HasSpilledVGPRs = Spill;
     474             :   }
     475             : 
     476           0 :   bool hasNonSpillStackObjects() const {
     477           0 :     return HasNonSpillStackObjects;
     478             :   }
     479             : 
     480             :   void setHasNonSpillStackObjects(bool StackObject = true) {
     481         418 :     HasNonSpillStackObjects = StackObject;
     482             :   }
     483             : 
     484           0 :   bool isStackRealigned() const {
     485           0 :     return IsStackRealigned;
     486             :   }
     487             : 
     488             :   void setIsStackRealigned(bool Realigned = true) {
     489           3 :     IsStackRealigned = Realigned;
     490             :   }
     491             : 
     492           0 :   unsigned getNumSpilledSGPRs() const {
     493           0 :     return NumSpilledSGPRs;
     494             :   }
     495             : 
     496           0 :   unsigned getNumSpilledVGPRs() const {
     497           0 :     return NumSpilledVGPRs;
     498             :   }
     499             : 
     500           0 :   void addToSpilledSGPRs(unsigned num) {
     501         690 :     NumSpilledSGPRs += num;
     502           0 :   }
     503             : 
     504           0 :   void addToSpilledVGPRs(unsigned num) {
     505        1316 :     NumSpilledVGPRs += num;
     506           0 :   }
     507             : 
     508           0 :   unsigned getPSInputAddr() const {
     509           0 :     return PSInputAddr;
     510             :   }
     511             : 
     512           0 :   unsigned getPSInputEnable() const {
     513           0 :     return PSInputEnable;
     514             :   }
     515             : 
     516           0 :   bool isPSInputAllocated(unsigned Index) const {
     517           4 :     return PSInputAddr & (1 << Index);
     518             :   }
     519             : 
     520           0 :   void markPSInputAllocated(unsigned Index) {
     521           1 :     PSInputAddr |= 1 << Index;
     522           0 :   }
     523             : 
     524           0 :   void markPSInputEnabled(unsigned Index) {
     525         303 :     PSInputEnable |= 1 << Index;
     526           0 :   }
     527             : 
     528           0 :   bool returnsVoid() const {
     529           0 :     return ReturnsVoid;
     530             :   }
     531             : 
     532             :   void setIfReturnsVoid(bool Value) {
     533        3450 :     ReturnsVoid = Value;
     534             :   }
     535             : 
     536             :   /// \returns A pair of default/requested minimum/maximum flat work group sizes
     537             :   /// for this function.
     538             :   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
     539             :     return FlatWorkGroupSizes;
     540             :   }
     541             : 
     542             :   /// \returns Default/requested minimum flat work group size for this function.
     543             :   unsigned getMinFlatWorkGroupSize() const {
     544             :     return FlatWorkGroupSizes.first;
     545             :   }
     546             : 
     547             :   /// \returns Default/requested maximum flat work group size for this function.
     548           0 :   unsigned getMaxFlatWorkGroupSize() const {
     549           0 :     return FlatWorkGroupSizes.second;
     550             :   }
     551             : 
     552             :   /// \returns A pair of default/requested minimum/maximum number of waves per
     553             :   /// execution unit.
     554           0 :   std::pair<unsigned, unsigned> getWavesPerEU() const {
     555           0 :     return WavesPerEU;
     556             :   }
     557             : 
     558             :   /// \returns Default/requested minimum number of waves per execution unit.
     559             :   unsigned getMinWavesPerEU() const {
     560             :     return WavesPerEU.first;
     561             :   }
     562             : 
     563             :   /// \returns Default/requested maximum number of waves per execution unit.
     564           0 :   unsigned getMaxWavesPerEU() const {
     565           0 :     return WavesPerEU.second;
     566             :   }
     567             : 
     568             :   /// \returns Stack object index for \p Dim's work group ID.
     569             :   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
     570             :     assert(Dim < 3);
     571          12 :     return DebuggerWorkGroupIDStackObjectIndices[Dim];
     572             :   }
     573             : 
     574             :   /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
     575             :   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     576             :     assert(Dim < 3);
     577          12 :     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
     578             :   }
     579             : 
     580             :   /// \returns Stack object index for \p Dim's work item ID.
     581             :   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
     582             :     assert(Dim < 3);
     583          12 :     return DebuggerWorkItemIDStackObjectIndices[Dim];
     584             :   }
     585             : 
     586             :   /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
     587             :   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     588             :     assert(Dim < 3);
     589          12 :     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
     590             :   }
     591             : 
     592             :   /// \returns SGPR used for \p Dim's work group ID.
     593             :   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
     594          12 :     switch (Dim) {
     595           4 :     case 0:
     596             :       assert(hasWorkGroupIDX());
     597           4 :       return ArgInfo.WorkGroupIDX.getRegister();
     598           4 :     case 1:
     599             :       assert(hasWorkGroupIDY());
     600           4 :       return ArgInfo.WorkGroupIDY.getRegister();
     601           4 :     case 2:
     602             :       assert(hasWorkGroupIDZ());
     603           4 :       return ArgInfo.WorkGroupIDZ.getRegister();
     604             :     }
     605           0 :     llvm_unreachable("unexpected dimension");
     606             :   }
     607             : 
     608             :   /// \returns VGPR used for \p Dim' work item ID.
     609             :   unsigned getWorkItemIDVGPR(unsigned Dim) const;
     610             : 
     611           0 :   unsigned getLDSWaveSpillSize() const {
     612           0 :     return LDSWaveSpillSize;
     613             :   }
     614             : 
     615        1113 :   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
     616             :                                                     const Value *BufferRsrc) {
     617             :     assert(BufferRsrc);
     618        1113 :     auto PSV = BufferPSVs.try_emplace(
     619             :       BufferRsrc,
     620        1113 :       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
     621        2226 :     return PSV.first->second.get();
     622             :   }
     623             : 
     624         709 :   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
     625             :                                                   const Value *ImgRsrc) {
     626             :     assert(ImgRsrc);
     627         709 :     auto PSV = ImagePSVs.try_emplace(
     628             :       ImgRsrc,
     629         709 :       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
     630        1418 :     return PSV.first->second.get();
     631             :   }
     632             : 
     633           0 :   unsigned getOccupancy() const {
     634           0 :     return Occupancy;
     635             :   }
     636             : 
     637             :   unsigned getMinAllowedOccupancy() const {
     638          41 :     if (!isMemoryBound() && !needsWaveLimiter())
     639           9 :       return Occupancy;
     640          32 :     return (Occupancy < 4) ? Occupancy : 4;
     641             :   }
     642             : 
     643             :   void limitOccupancy(const MachineFunction &MF);
     644             : 
     645           0 :   void limitOccupancy(unsigned Limit) {
     646       81145 :     if (Occupancy > Limit)
     647         300 :       Occupancy = Limit;
     648           0 :   }
     649             : 
     650             :   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
     651           2 :     if (Occupancy < Limit)
     652           0 :       Occupancy = Limit;
     653           2 :     limitOccupancy(MF);
     654             :   }
     655             : };
     656             : 
     657             : } // end namespace llvm
     658             : 
     659             : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H

Generated by: LCOV version 1.13