LLVM  9.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  PrivateSegmentBuffer(false),
32  DispatchPtr(false),
33  QueuePtr(false),
34  KernargSegmentPtr(false),
35  DispatchID(false),
36  FlatScratchInit(false),
37  WorkGroupIDX(false),
38  WorkGroupIDY(false),
39  WorkGroupIDZ(false),
40  WorkGroupInfo(false),
41  PrivateSegmentWaveByteOffset(false),
42  WorkItemIDX(false),
43  WorkItemIDY(false),
44  WorkItemIDZ(false),
45  ImplicitBufferPtr(false),
46  ImplicitArgPtr(false),
47  GITPtrHigh(0xffffffff),
48  HighBitsOf32BitAddress(0) {
50  const Function &F = MF.getFunction();
51  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
52  WavesPerEU = ST.getWavesPerEU(F);
53 
54  Occupancy = getMaxWavesPerEU();
55  limitOccupancy(MF);
56  CallingConv::ID CC = F.getCallingConv();
57 
59  if (!F.arg_empty())
60  KernargSegmentPtr = true;
61  WorkGroupIDX = true;
62  WorkItemIDX = true;
63  } else if (CC == CallingConv::AMDGPU_PS) {
64  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
65  }
66 
67  if (!isEntryFunction()) {
68  // Non-entry functions have no special inputs for now, other registers
69  // required for scratch access.
70  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
71  ScratchWaveOffsetReg = AMDGPU::SGPR4;
72  FrameOffsetReg = AMDGPU::SGPR5;
73  StackPtrOffsetReg = AMDGPU::SGPR32;
74 
75  ArgInfo.PrivateSegmentBuffer =
76  ArgDescriptor::createRegister(ScratchRSrcReg);
78  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
79 
80  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
81  ImplicitArgPtr = true;
82  } else {
83  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
84  KernargSegmentPtr = true;
87  }
88  }
89 
90  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
91  WorkGroupIDX = true;
92 
93  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
94  WorkGroupIDY = true;
95 
96  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
97  WorkGroupIDZ = true;
98 
99  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
100  WorkItemIDX = true;
101 
102  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
103  WorkItemIDY = true;
104 
105  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
106  WorkItemIDZ = true;
107 
108  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
109  bool HasStackObjects = FrameInfo.hasStackObjects();
110 
111  if (isEntryFunction()) {
112  // X, XY, and XYZ are the only supported combinations, so make sure Y is
113  // enabled if Z is.
114  if (WorkItemIDZ)
115  WorkItemIDY = true;
116 
117  PrivateSegmentWaveByteOffset = true;
118 
119  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
120  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
123  ArgDescriptor::createRegister(AMDGPU::SGPR5);
124  }
125 
126  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
127  if (isAmdHsaOrMesa) {
128  PrivateSegmentBuffer = true;
129 
130  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
131  DispatchPtr = true;
132 
133  if (F.hasFnAttribute("amdgpu-queue-ptr"))
134  QueuePtr = true;
135 
136  if (F.hasFnAttribute("amdgpu-dispatch-id"))
137  DispatchID = true;
138  } else if (ST.isMesaGfxShader(F)) {
139  ImplicitBufferPtr = true;
140  }
141 
142  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
143  KernargSegmentPtr = true;
144 
145  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
146  // TODO: This could be refined a lot. The attribute is a poor way of
147  // detecting calls that may require it before argument lowering.
148  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
149  FlatScratchInit = true;
150  }
151 
152  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
153  StringRef S = A.getValueAsString();
154  if (!S.empty())
155  S.consumeInteger(0, GITPtrHigh);
156 
157  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
158  S = A.getValueAsString();
159  if (!S.empty())
160  S.consumeInteger(0, HighBitsOf32BitAddress);
161 }
162 
165  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
167  MF.getFunction()));
168 }
169 
171  const SIRegisterInfo &TRI) {
172  ArgInfo.PrivateSegmentBuffer =
173  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
174  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
175  NumUserSGPRs += 4;
176  return ArgInfo.PrivateSegmentBuffer.getRegister();
177 }
178 
180  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
181  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
182  NumUserSGPRs += 2;
183  return ArgInfo.DispatchPtr.getRegister();
184 }
185 
187  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
188  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
189  NumUserSGPRs += 2;
190  return ArgInfo.QueuePtr.getRegister();
191 }
192 
194  ArgInfo.KernargSegmentPtr
195  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
196  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
197  NumUserSGPRs += 2;
198  return ArgInfo.KernargSegmentPtr.getRegister();
199 }
200 
202  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
203  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
204  NumUserSGPRs += 2;
205  return ArgInfo.DispatchID.getRegister();
206 }
207 
209  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
210  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
211  NumUserSGPRs += 2;
212  return ArgInfo.FlatScratchInit.getRegister();
213 }
214 
216  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
217  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
218  NumUserSGPRs += 2;
219  return ArgInfo.ImplicitBufferPtr.getRegister();
220 }
221 
222 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
223  for (unsigned I = 0; CSRegs[I]; ++I) {
224  if (CSRegs[I] == Reg)
225  return true;
226  }
227 
228  return false;
229 }
230 
231 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
233  int FI) {
234  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
235 
236  // This has already been allocated.
237  if (!SpillLanes.empty())
238  return true;
239 
240  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
241  const SIRegisterInfo *TRI = ST.getRegisterInfo();
242  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
244  unsigned WaveSize = ST.getWavefrontSize();
245 
246  unsigned Size = FrameInfo.getObjectSize(FI);
247  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
248  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
249 
250  int NumLanes = Size / 4;
251 
252  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
253 
254  // Make sure to handle the case where a wide SGPR spill may span between two
255  // VGPRs.
256  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
257  unsigned LaneVGPR;
258  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
259 
260  if (VGPRIndex == 0) {
261  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
262  if (LaneVGPR == AMDGPU::NoRegister) {
263  // We have no VGPRs left for spilling SGPRs. Reset because we will not
264  // partially spill the SGPR to VGPRs.
265  SGPRToVGPRSpills.erase(FI);
266  NumVGPRSpillLanes -= I;
267  return false;
268  }
269 
270  Optional<int> CSRSpillFI;
271  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
272  isCalleeSavedReg(CSRegs, LaneVGPR)) {
273  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
274  }
275 
276  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
277 
278  // Add this register as live-in to all blocks to avoid machine verifer
279  // complaining about use of an undefined physical register.
280  for (MachineBasicBlock &BB : MF)
281  BB.addLiveIn(LaneVGPR);
282  } else {
283  LaneVGPR = SpillVGPRs.back().VGPR;
284  }
285 
286  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
287  }
288 
289  return true;
290 }
291 
293  for (auto &R : SGPRToVGPRSpills)
294  MFI.RemoveStackObject(R.first);
295 }
296 
297 
298 /// \returns VGPR used for \p Dim' work item ID.
299 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
300  switch (Dim) {
301  case 0:
303  return AMDGPU::VGPR0;
304  case 1:
306  return AMDGPU::VGPR1;
307  case 2:
309  return AMDGPU::VGPR2;
310  }
311  llvm_unreachable("unexpected dimension");
312 }
313 
314 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
315  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
316  return AMDGPU::SGPR0 + NumUserSGPRs;
317 }
318 
319 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
320  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
321 }
322 
324  const TargetRegisterInfo &TRI) {
325  yaml::StringValue Dest;
326  {
327  raw_string_ostream OS(Dest.Value);
328  OS << printReg(Reg, &TRI);
329  }
330  return Dest;
331 }
332 
334  const llvm::SIMachineFunctionInfo& MFI,
335  const TargetRegisterInfo &TRI)
338  LDSSize(MFI.getLDSSize()),
341  MemoryBound(MFI.isMemoryBound()),
343  ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
344  ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
345  FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
346  StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)) {}
347 
350 }
351 
353  const yaml::SIMachineFunctionInfo &YamlMFI) {
356  LDSSize = YamlMFI.LDSSize;
359  MemoryBound = YamlMFI.MemoryBound;
360  WaveLimiter = YamlMFI.WaveLimiter;
361  return false;
362 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
unsigned const TargetRegisterInfo * TRI
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
bool isMesaGfxShader(const Function &F) const
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
unsigned addDispatchID(const SIRegisterInfo &TRI)
static yaml::StringValue regToString(unsigned Reg, const TargetRegisterInfo &TRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
void limitOccupancy(const MachineFunction &MF)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:512
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
unsigned const MachineRegisterInfo * MRI
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Generation getGeneration() const
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
unsigned LDSSize
Number of bytes in the LDS that are being used.
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void mappingImpl(yaml::IO &YamlIO) override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
#define I(x, y, z)
Definition: MD5.cpp:58
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
unsigned getWorkItemIDVGPR(unsigned Dim) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override