LLVM  9.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  PrivateSegmentBuffer(false),
32  DispatchPtr(false),
33  QueuePtr(false),
34  KernargSegmentPtr(false),
35  DispatchID(false),
36  FlatScratchInit(false),
37  WorkGroupIDX(false),
38  WorkGroupIDY(false),
39  WorkGroupIDZ(false),
40  WorkGroupInfo(false),
41  PrivateSegmentWaveByteOffset(false),
42  WorkItemIDX(false),
43  WorkItemIDY(false),
44  WorkItemIDZ(false),
45  ImplicitBufferPtr(false),
46  ImplicitArgPtr(false),
47  GITPtrHigh(0xffffffff),
48  HighBitsOf32BitAddress(0) {
50  const Function &F = MF.getFunction();
51  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
52  WavesPerEU = ST.getWavesPerEU(F);
53 
54  Occupancy = getMaxWavesPerEU();
55  limitOccupancy(MF);
56  CallingConv::ID CC = F.getCallingConv();
57 
59  if (!F.arg_empty())
60  KernargSegmentPtr = true;
61  WorkGroupIDX = true;
62  WorkItemIDX = true;
63  } else if (CC == CallingConv::AMDGPU_PS) {
64  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
65  }
66 
67  if (!isEntryFunction()) {
68  // Non-entry functions have no special inputs for now, other registers
69  // required for scratch access.
70  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
71  ScratchWaveOffsetReg = AMDGPU::SGPR4;
72  FrameOffsetReg = AMDGPU::SGPR5;
73  StackPtrOffsetReg = AMDGPU::SGPR32;
74 
75  ArgInfo.PrivateSegmentBuffer =
76  ArgDescriptor::createRegister(ScratchRSrcReg);
78  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
79 
80  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
81  ImplicitArgPtr = true;
82  } else {
83  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
84  KernargSegmentPtr = true;
87  }
88  }
89 
90  if (ST.debuggerEmitPrologue()) {
91  // Enable everything.
92  WorkGroupIDX = true;
93  WorkGroupIDY = true;
94  WorkGroupIDZ = true;
95  WorkItemIDX = true;
96  WorkItemIDY = true;
97  WorkItemIDZ = true;
98  } else {
99  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
100  WorkGroupIDX = true;
101 
102  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
103  WorkGroupIDY = true;
104 
105  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
106  WorkGroupIDZ = true;
107 
108  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
109  WorkItemIDX = true;
110 
111  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
112  WorkItemIDY = true;
113 
114  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
115  WorkItemIDZ = true;
116  }
117 
118  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
119  bool HasStackObjects = FrameInfo.hasStackObjects();
120 
121  if (isEntryFunction()) {
122  // X, XY, and XYZ are the only supported combinations, so make sure Y is
123  // enabled if Z is.
124  if (WorkItemIDZ)
125  WorkItemIDY = true;
126 
127  PrivateSegmentWaveByteOffset = true;
128 
129  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
130  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
133  ArgDescriptor::createRegister(AMDGPU::SGPR5);
134  }
135 
136  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
137  if (isAmdHsaOrMesa) {
138  PrivateSegmentBuffer = true;
139 
140  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
141  DispatchPtr = true;
142 
143  if (F.hasFnAttribute("amdgpu-queue-ptr"))
144  QueuePtr = true;
145 
146  if (F.hasFnAttribute("amdgpu-dispatch-id"))
147  DispatchID = true;
148  } else if (ST.isMesaGfxShader(F)) {
149  ImplicitBufferPtr = true;
150  }
151 
152  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
153  KernargSegmentPtr = true;
154 
155  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
156  // TODO: This could be refined a lot. The attribute is a poor way of
157  // detecting calls that may require it before argument lowering.
158  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
159  FlatScratchInit = true;
160  }
161 
162  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
163  StringRef S = A.getValueAsString();
164  if (!S.empty())
165  S.consumeInteger(0, GITPtrHigh);
166 
167  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
168  S = A.getValueAsString();
169  if (!S.empty())
170  S.consumeInteger(0, HighBitsOf32BitAddress);
171 }
172 
175  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
177  MF.getFunction()));
178 }
179 
181  const SIRegisterInfo &TRI) {
182  ArgInfo.PrivateSegmentBuffer =
183  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
184  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
185  NumUserSGPRs += 4;
186  return ArgInfo.PrivateSegmentBuffer.getRegister();
187 }
188 
190  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
191  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
192  NumUserSGPRs += 2;
193  return ArgInfo.DispatchPtr.getRegister();
194 }
195 
197  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
198  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
199  NumUserSGPRs += 2;
200  return ArgInfo.QueuePtr.getRegister();
201 }
202 
204  ArgInfo.KernargSegmentPtr
205  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
206  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
207  NumUserSGPRs += 2;
208  return ArgInfo.KernargSegmentPtr.getRegister();
209 }
210 
212  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
213  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
214  NumUserSGPRs += 2;
215  return ArgInfo.DispatchID.getRegister();
216 }
217 
219  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
220  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
221  NumUserSGPRs += 2;
222  return ArgInfo.FlatScratchInit.getRegister();
223 }
224 
226  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
227  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
228  NumUserSGPRs += 2;
229  return ArgInfo.ImplicitBufferPtr.getRegister();
230 }
231 
232 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
233  for (unsigned I = 0; CSRegs[I]; ++I) {
234  if (CSRegs[I] == Reg)
235  return true;
236  }
237 
238  return false;
239 }
240 
241 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
243  int FI) {
244  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
245 
246  // This has already been allocated.
247  if (!SpillLanes.empty())
248  return true;
249 
250  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
251  const SIRegisterInfo *TRI = ST.getRegisterInfo();
252  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
254  unsigned WaveSize = ST.getWavefrontSize();
255 
256  unsigned Size = FrameInfo.getObjectSize(FI);
257  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
258  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
259 
260  int NumLanes = Size / 4;
261 
262  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
263 
264  // Make sure to handle the case where a wide SGPR spill may span between two
265  // VGPRs.
266  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
267  unsigned LaneVGPR;
268  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
269 
270  if (VGPRIndex == 0) {
271  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
272  if (LaneVGPR == AMDGPU::NoRegister) {
273  // We have no VGPRs left for spilling SGPRs. Reset because we will not
274  // partially spill the SGPR to VGPRs.
275  SGPRToVGPRSpills.erase(FI);
276  NumVGPRSpillLanes -= I;
277  return false;
278  }
279 
280  Optional<int> CSRSpillFI;
281  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
282  isCalleeSavedReg(CSRegs, LaneVGPR)) {
283  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
284  }
285 
286  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
287 
288  // Add this register as live-in to all blocks to avoid machine verifer
289  // complaining about use of an undefined physical register.
290  for (MachineBasicBlock &BB : MF)
291  BB.addLiveIn(LaneVGPR);
292  } else {
293  LaneVGPR = SpillVGPRs.back().VGPR;
294  }
295 
296  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
297  }
298 
299  return true;
300 }
301 
303  for (auto &R : SGPRToVGPRSpills)
304  MFI.RemoveStackObject(R.first);
305 }
306 
307 
308 /// \returns VGPR used for \p Dim' work item ID.
309 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
310  switch (Dim) {
311  case 0:
313  return AMDGPU::VGPR0;
314  case 1:
316  return AMDGPU::VGPR1;
317  case 2:
319  return AMDGPU::VGPR2;
320  }
321  llvm_unreachable("unexpected dimension");
322 }
323 
324 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
325  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
326  return AMDGPU::SGPR0 + NumUserSGPRs;
327 }
328 
329 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
330  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
331 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
unsigned const TargetRegisterInfo * TRI
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
bool isMesaGfxShader(const Function &F) const
unsigned addDispatchID(const SIRegisterInfo &TRI)
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void limitOccupancy(const MachineFunction &MF)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:530
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:132
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool debuggerEmitPrologue() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Generation getGeneration() const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
#define I(x, y, z)
Definition: MD5.cpp:58
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getWorkItemIDVGPR(unsigned Dim) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override