LLVM  9.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  Mode(MF.getFunction()),
32  PrivateSegmentBuffer(false),
33  DispatchPtr(false),
34  QueuePtr(false),
35  KernargSegmentPtr(false),
36  DispatchID(false),
37  FlatScratchInit(false),
38  WorkGroupIDX(false),
39  WorkGroupIDY(false),
40  WorkGroupIDZ(false),
41  WorkGroupInfo(false),
42  PrivateSegmentWaveByteOffset(false),
43  WorkItemIDX(false),
44  WorkItemIDY(false),
45  WorkItemIDZ(false),
46  ImplicitBufferPtr(false),
47  ImplicitArgPtr(false),
48  GITPtrHigh(0xffffffff),
49  HighBitsOf32BitAddress(0) {
51  const Function &F = MF.getFunction();
52  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
53  WavesPerEU = ST.getWavesPerEU(F);
54 
55  Occupancy = getMaxWavesPerEU();
56  limitOccupancy(MF);
57  CallingConv::ID CC = F.getCallingConv();
58 
60  if (!F.arg_empty())
61  KernargSegmentPtr = true;
62  WorkGroupIDX = true;
63  WorkItemIDX = true;
64  } else if (CC == CallingConv::AMDGPU_PS) {
65  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
66  }
67 
68  if (!isEntryFunction()) {
69  // Non-entry functions have no special inputs for now, other registers
70  // required for scratch access.
71  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72  ScratchWaveOffsetReg = AMDGPU::SGPR4;
73  FrameOffsetReg = AMDGPU::SGPR5;
74  StackPtrOffsetReg = AMDGPU::SGPR32;
75 
76  ArgInfo.PrivateSegmentBuffer =
77  ArgDescriptor::createRegister(ScratchRSrcReg);
79  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
80 
81  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
82  ImplicitArgPtr = true;
83  } else {
84  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
85  KernargSegmentPtr = true;
88  }
89  }
90 
91  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
92  WorkGroupIDX = true;
93 
94  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
95  WorkGroupIDY = true;
96 
97  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
98  WorkGroupIDZ = true;
99 
100  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
101  WorkItemIDX = true;
102 
103  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
104  WorkItemIDY = true;
105 
106  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
107  WorkItemIDZ = true;
108 
109  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
110  bool HasStackObjects = FrameInfo.hasStackObjects();
111 
112  if (isEntryFunction()) {
113  // X, XY, and XYZ are the only supported combinations, so make sure Y is
114  // enabled if Z is.
115  if (WorkItemIDZ)
116  WorkItemIDY = true;
117 
118  PrivateSegmentWaveByteOffset = true;
119 
120  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
121  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
124  ArgDescriptor::createRegister(AMDGPU::SGPR5);
125  }
126 
127  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
128  if (isAmdHsaOrMesa) {
129  PrivateSegmentBuffer = true;
130 
131  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
132  DispatchPtr = true;
133 
134  if (F.hasFnAttribute("amdgpu-queue-ptr"))
135  QueuePtr = true;
136 
137  if (F.hasFnAttribute("amdgpu-dispatch-id"))
138  DispatchID = true;
139  } else if (ST.isMesaGfxShader(F)) {
140  ImplicitBufferPtr = true;
141  }
142 
143  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
144  KernargSegmentPtr = true;
145 
146  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
147  // TODO: This could be refined a lot. The attribute is a poor way of
148  // detecting calls that may require it before argument lowering.
149  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
150  FlatScratchInit = true;
151  }
152 
153  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
154  StringRef S = A.getValueAsString();
155  if (!S.empty())
156  S.consumeInteger(0, GITPtrHigh);
157 
158  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
159  S = A.getValueAsString();
160  if (!S.empty())
161  S.consumeInteger(0, HighBitsOf32BitAddress);
162 }
163 
166  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
168  MF.getFunction()));
169 }
170 
172  const SIRegisterInfo &TRI) {
173  ArgInfo.PrivateSegmentBuffer =
174  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
175  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
176  NumUserSGPRs += 4;
177  return ArgInfo.PrivateSegmentBuffer.getRegister();
178 }
179 
181  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
182  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
183  NumUserSGPRs += 2;
184  return ArgInfo.DispatchPtr.getRegister();
185 }
186 
188  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
189  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
190  NumUserSGPRs += 2;
191  return ArgInfo.QueuePtr.getRegister();
192 }
193 
195  ArgInfo.KernargSegmentPtr
196  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
197  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
198  NumUserSGPRs += 2;
199  return ArgInfo.KernargSegmentPtr.getRegister();
200 }
201 
203  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
205  NumUserSGPRs += 2;
206  return ArgInfo.DispatchID.getRegister();
207 }
208 
210  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212  NumUserSGPRs += 2;
213  return ArgInfo.FlatScratchInit.getRegister();
214 }
215 
217  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219  NumUserSGPRs += 2;
220  return ArgInfo.ImplicitBufferPtr.getRegister();
221 }
222 
223 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
224  for (unsigned I = 0; CSRegs[I]; ++I) {
225  if (CSRegs[I] == Reg)
226  return true;
227  }
228 
229  return false;
230 }
231 
232 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
234  int FI) {
235  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
236 
237  // This has already been allocated.
238  if (!SpillLanes.empty())
239  return true;
240 
241  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
242  const SIRegisterInfo *TRI = ST.getRegisterInfo();
243  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
245  unsigned WaveSize = ST.getWavefrontSize();
246 
247  unsigned Size = FrameInfo.getObjectSize(FI);
248  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
249  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
250 
251  int NumLanes = Size / 4;
252 
253  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
254 
255  // Make sure to handle the case where a wide SGPR spill may span between two
256  // VGPRs.
257  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
258  unsigned LaneVGPR;
259  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
260 
261  if (VGPRIndex == 0) {
262  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
263  if (LaneVGPR == AMDGPU::NoRegister) {
264  // We have no VGPRs left for spilling SGPRs. Reset because we will not
265  // partially spill the SGPR to VGPRs.
266  SGPRToVGPRSpills.erase(FI);
267  NumVGPRSpillLanes -= I;
268  return false;
269  }
270 
271  Optional<int> CSRSpillFI;
272  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
273  isCalleeSavedReg(CSRegs, LaneVGPR)) {
274  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
275  }
276 
277  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
278 
279  // Add this register as live-in to all blocks to avoid machine verifer
280  // complaining about use of an undefined physical register.
281  for (MachineBasicBlock &BB : MF)
282  BB.addLiveIn(LaneVGPR);
283  } else {
284  LaneVGPR = SpillVGPRs.back().VGPR;
285  }
286 
287  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
288  }
289 
290  return true;
291 }
292 
294  for (auto &R : SGPRToVGPRSpills)
295  MFI.RemoveStackObject(R.first);
296  // All other SPGRs must be allocated on the default stack, so reset
297  // the stack ID.
298  for (unsigned i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd();
299  i != e; ++i)
300  MFI.setStackID(i, 0);
301 }
302 
303 
304 /// \returns VGPR used for \p Dim' work item ID.
305 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
306  switch (Dim) {
307  case 0:
309  return AMDGPU::VGPR0;
310  case 1:
312  return AMDGPU::VGPR1;
313  case 2:
315  return AMDGPU::VGPR2;
316  }
317  llvm_unreachable("unexpected dimension");
318 }
319 
320 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
321  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
322  return AMDGPU::SGPR0 + NumUserSGPRs;
323 }
324 
325 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
326  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
327 }
328 
330  const TargetRegisterInfo &TRI) {
331  yaml::StringValue Dest;
332  {
333  raw_string_ostream OS(Dest.Value);
334  OS << printReg(Reg, &TRI);
335  }
336  return Dest;
337 }
338 
340  const llvm::SIMachineFunctionInfo& MFI,
341  const TargetRegisterInfo &TRI)
344  LDSSize(MFI.getLDSSize()),
347  MemoryBound(MFI.isMemoryBound()),
349  ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
350  ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
351  FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
352  StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)) {}
353 
356 }
357 
359  const yaml::SIMachineFunctionInfo &YamlMFI) {
362  LDSSize = YamlMFI.LDSSize;
365  MemoryBound = YamlMFI.MemoryBound;
366  WaveLimiter = YamlMFI.WaveLimiter;
367  return false;
368 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
unsigned const TargetRegisterInfo * TRI
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
bool isMesaGfxShader(const Function &F) const
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
unsigned addDispatchID(const SIRegisterInfo &TRI)
static yaml::StringValue regToString(unsigned Reg, const TargetRegisterInfo &TRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
void limitOccupancy(const MachineFunction &MF)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:512
int getObjectIndexBegin() const
Return the minimum frame object index.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Generation getGeneration() const
void setStackID(int ObjectIdx, uint8_t ID)
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
unsigned LDSSize
Number of bytes in the LDS that are being used.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void mappingImpl(yaml::IO &YamlIO) override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
#define I(x, y, z)
Definition: MD5.cpp:58
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
unsigned getWorkItemIDVGPR(unsigned Dim) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override