LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - SIMachineFunctionInfo.h (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 99 106 93.4 %
Date: 2018-02-23 15:42:53 Functions: 11 15 73.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : /// \file
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      15             : #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
      16             : 
      17             : #include "AMDGPUArgumentUsageInfo.h"
      18             : #include "AMDGPUMachineFunction.h"
      19             : #include "SIRegisterInfo.h"
      20             : #include "llvm/ADT/ArrayRef.h"
      21             : #include "llvm/ADT/DenseMap.h"
      22             : #include "llvm/ADT/Optional.h"
      23             : #include "llvm/ADT/SmallVector.h"
      24             : #include "llvm/CodeGen/PseudoSourceValue.h"
      25             : #include "llvm/CodeGen/TargetInstrInfo.h"
      26             : #include "llvm/MC/MCRegisterInfo.h"
      27             : #include "llvm/Support/ErrorHandling.h"
      28             : #include <array>
      29             : #include <cassert>
      30             : #include <utility>
      31             : #include <vector>
      32             : 
      33             : namespace llvm {
      34             : 
      35             : class MachineFrameInfo;
      36             : class MachineFunction;
      37             : class SIInstrInfo;
      38             : class TargetRegisterClass;
      39             : 
      40         524 : class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
      41             : public:
      42             :   // TODO: Is the img rsrc useful?
      43         524 :   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
      44         524 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
      45             : 
      46        3185 :   bool isConstant(const MachineFrameInfo *) const override {
      47             :     // This should probably be true for most images, but we will start by being
      48             :     // conservative.
      49        3185 :     return false;
      50             :   }
      51             : 
      52         836 :   bool isAliased(const MachineFrameInfo *) const override {
      53         836 :     return true;
      54             :   }
      55             : 
      56         409 :   bool mayAlias(const MachineFrameInfo *) const override {
      57         409 :     return true;
      58             :   }
      59             : };
      60             : 
      61         461 : class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
      62             : public:
      63         461 :   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
      64         461 :     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
      65             : 
      66        1695 :   bool isConstant(const MachineFrameInfo *) const override {
      67             :     // This should probably be true for most images, but we will start by being
      68             :     // conservative.
      69        1695 :     return false;
      70             :   }
      71             : 
      72         705 :   bool isAliased(const MachineFrameInfo *) const override {
      73         705 :     return true;
      74             :   }
      75             : 
      76           0 :   bool mayAlias(const MachineFrameInfo *) const override {
      77           0 :     return true;
      78             :   }
      79             : };
      80             : 
      81             : /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
      82             : /// tells the hardware which interpolation parameters to load.
      83       84930 : class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
      84             :   unsigned TIDReg = AMDGPU::NoRegister;
      85             : 
      86             :   // Registers that may be reserved for spilling purposes. These may be the same
      87             :   // as the input registers.
      88             :   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
      89             :   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
      90             : 
      91             :   // This is the current function's incremented size from the kernel's scratch
      92             :   // wave offset register. For an entry function, this is exactly the same as
      93             :   // the ScratchWaveOffsetReg.
      94             :   unsigned FrameOffsetReg = AMDGPU::FP_REG;
      95             : 
      96             :   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
      97             :   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
      98             : 
      99             :   AMDGPUFunctionArgInfo ArgInfo;
     100             : 
     101             :   // Graphics info.
     102             :   unsigned PSInputAddr = 0;
     103             :   unsigned PSInputEnable = 0;
     104             : 
     105             :   /// Number of bytes of arguments this function has on the stack. If the callee
     106             :   /// is expected to restore the argument stack this should be a multiple of 16,
     107             :   /// all usable during a tail call.
     108             :   ///
     109             :   /// The alternative would forbid tail call optimisation in some cases: if we
     110             :   /// want to transfer control from a function with 8-bytes of stack-argument
     111             :   /// space to a function with 16-bytes then misalignment of this value would
     112             :   /// make a stack adjustment necessary, which could not be undone by the
     113             :   /// callee.
     114             :   unsigned BytesInStackArgArea = 0;
     115             : 
     116             :   bool ReturnsVoid = true;
     117             : 
     118             :   // A pair of default/requested minimum/maximum flat work group sizes.
     119             :   // Minimum - first, maximum - second.
     120             :   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
     121             : 
     122             :   // A pair of default/requested minimum/maximum number of waves per execution
     123             :   // unit. Minimum - first, maximum - second.
     124             :   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
     125             : 
     126             :   // Stack object indices for work group IDs.
     127             :   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
     128             : 
     129             :   // Stack object indices for work item IDs.
     130             :   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
     131             : 
     132             :   DenseMap<const Value *,
     133             :            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
     134             :   DenseMap<const Value *,
     135             :            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
     136             : 
     137             : private:
     138             :   unsigned LDSWaveSpillSize = 0;
     139             :   unsigned NumUserSGPRs = 0;
     140             :   unsigned NumSystemSGPRs = 0;
     141             : 
     142             :   bool HasSpilledSGPRs = false;
     143             :   bool HasSpilledVGPRs = false;
     144             :   bool HasNonSpillStackObjects = false;
     145             : 
     146             :   unsigned NumSpilledSGPRs = 0;
     147             :   unsigned NumSpilledVGPRs = 0;
     148             : 
     149             :   // Feature bits required for inputs passed in user SGPRs.
     150             :   bool PrivateSegmentBuffer : 1;
     151             :   bool DispatchPtr : 1;
     152             :   bool QueuePtr : 1;
     153             :   bool KernargSegmentPtr : 1;
     154             :   bool DispatchID : 1;
     155             :   bool FlatScratchInit : 1;
     156             :   bool GridWorkgroupCountX : 1;
     157             :   bool GridWorkgroupCountY : 1;
     158             :   bool GridWorkgroupCountZ : 1;
     159             : 
     160             :   // Feature bits required for inputs passed in system SGPRs.
     161             :   bool WorkGroupIDX : 1; // Always initialized.
     162             :   bool WorkGroupIDY : 1;
     163             :   bool WorkGroupIDZ : 1;
     164             :   bool WorkGroupInfo : 1;
     165             :   bool PrivateSegmentWaveByteOffset : 1;
     166             : 
     167             :   bool WorkItemIDX : 1; // Always initialized.
     168             :   bool WorkItemIDY : 1;
     169             :   bool WorkItemIDZ : 1;
     170             : 
     171             :   // Private memory buffer
     172             :   // Compute directly in sgpr[0:1]
     173             :   // Other shaders indirect 64-bits at sgpr[0:1]
     174             :   bool ImplicitBufferPtr : 1;
     175             : 
     176             :   // Pointer to where the ABI inserts special kernel arguments separate from the
     177             :   // user arguments. This is an offset from the KernargSegmentPtr.
     178             :   bool ImplicitArgPtr : 1;
     179             : 
     180             :   // The hard-wired high half of the address of the global information table
     181             :   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
     182             :   // current hardware only allows a 16 bit value.
     183             :   unsigned GITPtrHigh;
     184             : 
     185             :   unsigned HighBitsOf32BitAddress;
     186             : 
     187             :   MCPhysReg getNextUserSGPR() const {
     188             :     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
     189       16001 :     return AMDGPU::SGPR0 + NumUserSGPRs;
     190             :   }
     191             : 
     192             :   MCPhysReg getNextSystemSGPR() const {
     193       29234 :     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
     194             :   }
     195             : 
     196             : public:
     197             :   struct SpilledReg {
     198             :     unsigned VGPR = AMDGPU::NoRegister;
     199             :     int Lane = -1;
     200             : 
     201             :     SpilledReg() = default;
     202        1090 :     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
     203             : 
     204             :     bool hasLane() { return Lane != -1;}
     205             :     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
     206             :   };
     207             : 
     208         133 :   struct SGPRSpillVGPRCSR {
     209             :     // VGPR used for SGPR spills
     210             :     unsigned VGPR;
     211             : 
     212             :     // If the VGPR is a CSR, the stack slot used to save/restore it in the
     213             :     // prolog/epilog.
     214             :     Optional<int> FI;
     215             : 
     216         131 :     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
     217             :   };
     218             : 
     219             : private:
     220             :   // SGPR->VGPR spilling support.
     221             :   using SpillRegMask = std::pair<unsigned, unsigned>;
     222             : 
     223             :   // Track VGPR + wave index for each subregister of the SGPR spilled to
     224             :   // frameindex key.
     225             :   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
     226             :   unsigned NumVGPRSpillLanes = 0;
     227             :   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
     228             : 
     229             : public:
     230             :   SIMachineFunctionInfo(const MachineFunction &MF);
     231             : 
     232        1209 :   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
     233        1209 :     auto I = SGPRToVGPRSpills.find(FrameIndex);
     234             :     return (I == SGPRToVGPRSpills.end()) ?
     235        2323 :       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
     236             :   }
     237             : 
     238             :   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
     239             :     return SpillVGPRs;
     240             :   }
     241             : 
     242             :   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
     243             :   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
     244             : 
     245             :   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
     246             :   unsigned getTIDReg() const { return TIDReg; }
     247           0 :   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
     248             : 
     249             :   unsigned getBytesInStackArgArea() const {
     250             :     return BytesInStackArgArea;
     251             :   }
     252             : 
     253             :   void setBytesInStackArgArea(unsigned Bytes) {
     254       16561 :     BytesInStackArgArea = Bytes;
     255             :   }
     256             : 
     257             :   // Add user SGPRs.
     258             :   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
     259             :   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
     260             :   unsigned addQueuePtr(const SIRegisterInfo &TRI);
     261             :   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
     262             :   unsigned addDispatchID(const SIRegisterInfo &TRI);
     263             :   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
     264             :   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
     265             : 
     266             :   // Add system SGPRs.
     267             :   unsigned addWorkGroupIDX() {
     268       29186 :     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
     269       14593 :     NumSystemSGPRs += 1;
     270             :     return ArgInfo.WorkGroupIDX.getRegister();
     271             :   }
     272             : 
     273             :   unsigned addWorkGroupIDY() {
     274          48 :     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
     275          24 :     NumSystemSGPRs += 1;
     276             :     return ArgInfo.WorkGroupIDY.getRegister();
     277             :   }
     278             : 
     279             :   unsigned addWorkGroupIDZ() {
     280          48 :     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
     281          24 :     NumSystemSGPRs += 1;
     282             :     return ArgInfo.WorkGroupIDZ.getRegister();
     283             :   }
     284             : 
     285             :   unsigned addWorkGroupInfo() {
     286           0 :     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
     287           0 :     NumSystemSGPRs += 1;
     288             :     return ArgInfo.WorkGroupInfo.getRegister();
     289             :   }
     290             : 
     291             :   // Add special VGPR inputs
     292             :   void setWorkItemIDX(ArgDescriptor Arg) {
     293       14608 :     ArgInfo.WorkItemIDX = Arg;
     294             :   }
     295             : 
     296             :   void setWorkItemIDY(ArgDescriptor Arg) {
     297         138 :     ArgInfo.WorkItemIDY = Arg;
     298             :   }
     299             : 
     300             :   void setWorkItemIDZ(ArgDescriptor Arg) {
     301          84 :     ArgInfo.WorkItemIDZ = Arg;
     302             :   }
     303             : 
     304             :   unsigned addPrivateSegmentWaveByteOffset() {
     305             :     ArgInfo.PrivateSegmentWaveByteOffset
     306       29186 :       = ArgDescriptor::createRegister(getNextSystemSGPR());
     307       14593 :     NumSystemSGPRs += 1;
     308             :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     309             :   }
     310             : 
     311             :   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
     312          44 :     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
     313             :   }
     314             : 
     315             :   bool hasPrivateSegmentBuffer() const {
     316       17285 :     return PrivateSegmentBuffer;
     317             :   }
     318             : 
     319             :   bool hasDispatchPtr() const {
     320       20323 :     return DispatchPtr;
     321             :   }
     322             : 
     323             :   bool hasQueuePtr() const {
     324       18452 :     return QueuePtr;
     325             :   }
     326             : 
     327             :   bool hasKernargSegmentPtr() const {
     328       18452 :     return KernargSegmentPtr;
     329             :   }
     330             : 
     331             :   bool hasDispatchID() const {
     332       18452 :     return DispatchID;
     333             :   }
     334             : 
     335             :   bool hasFlatScratchInit() const {
     336      154980 :     return FlatScratchInit;
     337             :   }
     338             : 
     339             :   bool hasGridWorkgroupCountX() const {
     340        1871 :     return GridWorkgroupCountX;
     341             :   }
     342             : 
     343             :   bool hasGridWorkgroupCountY() const {
     344        1871 :     return GridWorkgroupCountY;
     345             :   }
     346             : 
     347             :   bool hasGridWorkgroupCountZ() const {
     348        1871 :     return GridWorkgroupCountZ;
     349             :   }
     350             : 
     351             :   bool hasWorkGroupIDX() const {
     352       31965 :     return WorkGroupIDX;
     353             :   }
     354             : 
     355             :   bool hasWorkGroupIDY() const {
     356       31965 :     return WorkGroupIDY;
     357             :   }
     358             : 
     359             :   bool hasWorkGroupIDZ() const {
     360       31965 :     return WorkGroupIDZ;
     361             :   }
     362             : 
     363             :   bool hasWorkGroupInfo() const {
     364       30798 :     return WorkGroupInfo;
     365             :   }
     366             : 
     367             :   bool hasPrivateSegmentWaveByteOffset() const {
     368       15394 :     return PrivateSegmentWaveByteOffset;
     369             :   }
     370             : 
     371             :   bool hasWorkItemIDX() const {
     372       16561 :     return WorkItemIDX;
     373             :   }
     374             : 
     375             :   bool hasWorkItemIDY() const {
     376       31889 :     return WorkItemIDY;
     377             :   }
     378             : 
     379             :   bool hasWorkItemIDZ() const {
     380       31965 :     return WorkItemIDZ;
     381             :   }
     382             : 
     383             :   bool hasImplicitArgPtr() const {
     384        1167 :     return ImplicitArgPtr;
     385             :   }
     386             : 
     387             :   bool hasImplicitBufferPtr() const {
     388       15865 :     return ImplicitBufferPtr;
     389             :   }
     390             : 
     391             :   AMDGPUFunctionArgInfo &getArgInfo() {
     392       16561 :     return ArgInfo;
     393             :   }
     394             : 
     395             :   const AMDGPUFunctionArgInfo &getArgInfo() const {
     396         473 :     return ArgInfo;
     397             :   }
     398             : 
     399             :   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
     400             :   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     401       34540 :     return ArgInfo.getPreloadedValue(Value);
     402             :   }
     403             : 
     404             :   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
     405        2594 :     return ArgInfo.getPreloadedValue(Value).first->getRegister();
     406             :   }
     407             : 
     408             :   unsigned getGITPtrHigh() const {
     409             :     return GITPtrHigh;
     410             :   }
     411             : 
     412             :   unsigned get32BitAddressHighBits() const {
     413             :     return HighBitsOf32BitAddress;
     414             :   }
     415             : 
     416             :   unsigned getNumUserSGPRs() const {
     417             :     return NumUserSGPRs;
     418             :   }
     419             : 
     420             :   unsigned getNumPreloadedSGPRs() const {
     421        1198 :     return NumUserSGPRs + NumSystemSGPRs;
     422             :   }
     423             : 
     424             :   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
     425          48 :     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
     426             :   }
     427             : 
     428             :   /// \brief Returns the physical register reserved for use as the resource
     429             :   /// descriptor for scratch accesses.
     430             :   unsigned getScratchRSrcReg() const {
     431             :     return ScratchRSrcReg;
     432             :   }
     433             : 
     434             :   void setScratchRSrcReg(unsigned Reg) {
     435             :     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
     436       15809 :     ScratchRSrcReg = Reg;
     437             :   }
     438             : 
     439             :   unsigned getScratchWaveOffsetReg() const {
     440             :     return ScratchWaveOffsetReg;
     441             :   }
     442             : 
     443             :   unsigned getFrameOffsetReg() const {
     444             :     return FrameOffsetReg;
     445             :   }
     446             : 
     447             :   void setStackPtrOffsetReg(unsigned Reg) {
     448        1502 :     StackPtrOffsetReg = Reg;
     449             :   }
     450             : 
     451             :   // Note the unset value for this is AMDGPU::SP_REG rather than
     452             :   // NoRegister. This is mostly a workaround for MIR tests where state that
     453             :   // can't be directly computed from the function is not preserved in serialized
     454             :   // MIR.
     455             :   unsigned getStackPtrOffsetReg() const {
     456             :     return StackPtrOffsetReg;
     457             :   }
     458             : 
     459             :   void setScratchWaveOffsetReg(unsigned Reg) {
     460             :     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
     461       15855 :     ScratchWaveOffsetReg = Reg;
     462       15855 :     if (isEntryFunction())
     463        2599 :       FrameOffsetReg = ScratchWaveOffsetReg;
     464             :   }
     465             : 
     466             :   unsigned getQueuePtrUserSGPR() const {
     467          28 :     return ArgInfo.QueuePtr.getRegister();
     468             :   }
     469             : 
     470             :   unsigned getImplicitBufferPtrUserSGPR() const {
     471           2 :     return ArgInfo.ImplicitBufferPtr.getRegister();
     472             :   }
     473             : 
     474             :   bool hasSpilledSGPRs() const {
     475             :     return HasSpilledSGPRs;
     476             :   }
     477             : 
     478             :   void setHasSpilledSGPRs(bool Spill = true) {
     479         618 :     HasSpilledSGPRs = Spill;
     480             :   }
     481             : 
     482             :   bool hasSpilledVGPRs() const {
     483             :     return HasSpilledVGPRs;
     484             :   }
     485             : 
     486             :   void setHasSpilledVGPRs(bool Spill = true) {
     487        1178 :     HasSpilledVGPRs = Spill;
     488             :   }
     489             : 
     490             :   bool hasNonSpillStackObjects() const {
     491             :     return HasNonSpillStackObjects;
     492             :   }
     493             : 
     494             :   void setHasNonSpillStackObjects(bool StackObject = true) {
     495         370 :     HasNonSpillStackObjects = StackObject;
     496             :   }
     497             : 
     498             :   unsigned getNumSpilledSGPRs() const {
     499             :     return NumSpilledSGPRs;
     500             :   }
     501             : 
     502             :   unsigned getNumSpilledVGPRs() const {
     503             :     return NumSpilledVGPRs;
     504             :   }
     505             : 
     506             :   void addToSpilledSGPRs(unsigned num) {
     507         612 :     NumSpilledSGPRs += num;
     508             :   }
     509             : 
     510             :   void addToSpilledVGPRs(unsigned num) {
     511        1260 :     NumSpilledVGPRs += num;
     512             :   }
     513             : 
     514             :   unsigned getPSInputAddr() const {
     515             :     return PSInputAddr;
     516             :   }
     517             : 
     518             :   unsigned getPSInputEnable() const {
     519             :     return PSInputEnable;
     520             :   }
     521             : 
     522             :   bool isPSInputAllocated(unsigned Index) const {
     523        1101 :     return PSInputAddr & (1 << Index);
     524             :   }
     525             : 
     526             :   void markPSInputAllocated(unsigned Index) {
     527         956 :     PSInputAddr |= 1 << Index;
     528             :   }
     529             : 
     530             :   void markPSInputEnabled(unsigned Index) {
     531         844 :     PSInputEnable |= 1 << Index;
     532             :   }
     533             : 
     534             :   bool returnsVoid() const {
     535             :     return ReturnsVoid;
     536             :   }
     537             : 
     538             :   void setIfReturnsVoid(bool Value) {
     539        1932 :     ReturnsVoid = Value;
     540             :   }
     541             : 
     542             :   /// \returns A pair of default/requested minimum/maximum flat work group sizes
     543             :   /// for this function.
     544             :   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
     545             :     return FlatWorkGroupSizes;
     546             :   }
     547             : 
     548             :   /// \returns Default/requested minimum flat work group size for this function.
     549             :   unsigned getMinFlatWorkGroupSize() const {
     550             :     return FlatWorkGroupSizes.first;
     551             :   }
     552             : 
     553             :   /// \returns Default/requested maximum flat work group size for this function.
     554             :   unsigned getMaxFlatWorkGroupSize() const {
     555             :     return FlatWorkGroupSizes.second;
     556             :   }
     557             : 
     558             :   /// \returns A pair of default/requested minimum/maximum number of waves per
     559             :   /// execution unit.
     560             :   std::pair<unsigned, unsigned> getWavesPerEU() const {
     561             :     return WavesPerEU;
     562             :   }
     563             : 
     564             :   /// \returns Default/requested minimum number of waves per execution unit.
     565             :   unsigned getMinWavesPerEU() const {
     566             :     return WavesPerEU.first;
     567             :   }
     568             : 
     569             :   /// \returns Default/requested maximum number of waves per execution unit.
     570             :   unsigned getMaxWavesPerEU() const {
     571             :     return WavesPerEU.second;
     572             :   }
     573             : 
     574             :   /// \returns Stack object index for \p Dim's work group ID.
     575             :   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
     576             :     assert(Dim < 3);
     577          12 :     return DebuggerWorkGroupIDStackObjectIndices[Dim];
     578             :   }
     579             : 
     580             :   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
     581             :   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     582             :     assert(Dim < 3);
     583          12 :     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
     584             :   }
     585             : 
     586             :   /// \returns Stack object index for \p Dim's work item ID.
     587             :   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
     588             :     assert(Dim < 3);
     589          12 :     return DebuggerWorkItemIDStackObjectIndices[Dim];
     590             :   }
     591             : 
     592             :   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
     593             :   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
     594             :     assert(Dim < 3);
     595          12 :     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
     596             :   }
     597             : 
     598             :   /// \returns SGPR used for \p Dim's work group ID.
     599             :   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
     600          12 :     switch (Dim) {
     601           4 :     case 0:
     602             :       assert(hasWorkGroupIDX());
     603           4 :       return ArgInfo.WorkGroupIDX.getRegister();
     604           4 :     case 1:
     605             :       assert(hasWorkGroupIDY());
     606           4 :       return ArgInfo.WorkGroupIDY.getRegister();
     607           4 :     case 2:
     608             :       assert(hasWorkGroupIDZ());
     609           4 :       return ArgInfo.WorkGroupIDZ.getRegister();
     610             :     }
     611           0 :     llvm_unreachable("unexpected dimension");
     612             :   }
     613             : 
     614             :   /// \returns VGPR used for \p Dim' work item ID.
     615             :   unsigned getWorkItemIDVGPR(unsigned Dim) const {
     616          12 :     switch (Dim) {
     617             :     case 0:
     618             :       assert(hasWorkItemIDX());
     619             :       return AMDGPU::VGPR0;
     620           4 :     case 1:
     621             :       assert(hasWorkItemIDY());
     622             :       return AMDGPU::VGPR1;
     623           4 :     case 2:
     624             :       assert(hasWorkItemIDZ());
     625             :       return AMDGPU::VGPR2;
     626             :     }
     627           0 :     llvm_unreachable("unexpected dimension");
     628             :   }
     629             : 
     630             :   unsigned getLDSWaveSpillSize() const {
     631             :     return LDSWaveSpillSize;
     632             :   }
     633             : 
     634         461 :   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
     635             :                                                     const Value *BufferRsrc) {
     636             :     assert(BufferRsrc);
     637             :     auto PSV = BufferPSVs.try_emplace(
     638             :       BufferRsrc,
     639         922 :       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
     640         922 :     return PSV.first->second.get();
     641             :   }
     642             : 
     643         524 :   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
     644             :                                                   const Value *ImgRsrc) {
     645             :     assert(ImgRsrc);
     646             :     auto PSV = ImagePSVs.try_emplace(
     647             :       ImgRsrc,
     648        1048 :       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
     649        1048 :     return PSV.first->second.get();
     650             :   }
     651             : };
     652             : 
     653             : } // end namespace llvm
     654             : 
     655             : #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H

Generated by: LCOV version 1.13