LLVM  8.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/ADT/Optional.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Function.h"
23 #include <cassert>
24 #include <vector>
25 
26 #define MAX_LANES 64
27 
28 using namespace llvm;
29 
32  PrivateSegmentBuffer(false),
33  DispatchPtr(false),
34  QueuePtr(false),
35  KernargSegmentPtr(false),
36  DispatchID(false),
37  FlatScratchInit(false),
38  WorkGroupIDX(false),
39  WorkGroupIDY(false),
40  WorkGroupIDZ(false),
41  WorkGroupInfo(false),
42  PrivateSegmentWaveByteOffset(false),
43  WorkItemIDX(false),
44  WorkItemIDY(false),
45  WorkItemIDZ(false),
46  ImplicitBufferPtr(false),
47  ImplicitArgPtr(false),
48  GITPtrHigh(0xffffffff),
49  HighBitsOf32BitAddress(0) {
51  const Function &F = MF.getFunction();
52  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
53  WavesPerEU = ST.getWavesPerEU(F);
54 
55  Occupancy = getMaxWavesPerEU();
56  limitOccupancy(MF);
57  CallingConv::ID CC = F.getCallingConv();
58 
60  if (!F.arg_empty())
61  KernargSegmentPtr = true;
62  WorkGroupIDX = true;
63  WorkItemIDX = true;
64  } else if (CC == CallingConv::AMDGPU_PS) {
65  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
66  }
67 
68  if (!isEntryFunction()) {
69  // Non-entry functions have no special inputs for now, other registers
70  // required for scratch access.
71  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72  ScratchWaveOffsetReg = AMDGPU::SGPR4;
73  FrameOffsetReg = AMDGPU::SGPR5;
74  StackPtrOffsetReg = AMDGPU::SGPR32;
75 
76  ArgInfo.PrivateSegmentBuffer =
77  ArgDescriptor::createRegister(ScratchRSrcReg);
79  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
80 
81  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
82  ImplicitArgPtr = true;
83  } else {
84  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
85  KernargSegmentPtr = true;
88  }
89  }
90 
91  if (ST.debuggerEmitPrologue()) {
92  // Enable everything.
93  WorkGroupIDX = true;
94  WorkGroupIDY = true;
95  WorkGroupIDZ = true;
96  WorkItemIDX = true;
97  WorkItemIDY = true;
98  WorkItemIDZ = true;
99  } else {
100  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
101  WorkGroupIDX = true;
102 
103  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
104  WorkGroupIDY = true;
105 
106  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
107  WorkGroupIDZ = true;
108 
109  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
110  WorkItemIDX = true;
111 
112  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
113  WorkItemIDY = true;
114 
115  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
116  WorkItemIDZ = true;
117  }
118 
119  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
120  bool MaySpill = ST.isVGPRSpillingEnabled(F);
121  bool HasStackObjects = FrameInfo.hasStackObjects();
122 
123  if (isEntryFunction()) {
124  // X, XY, and XYZ are the only supported combinations, so make sure Y is
125  // enabled if Z is.
126  if (WorkItemIDZ)
127  WorkItemIDY = true;
128 
129  if (HasStackObjects || MaySpill) {
130  PrivateSegmentWaveByteOffset = true;
131 
132  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
133  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
136  = ArgDescriptor::createRegister(AMDGPU::SGPR5);
137  }
138  }
139 
140  bool IsCOV2 = ST.isAmdCodeObjectV2(F);
141  if (IsCOV2) {
142  if (HasStackObjects || MaySpill)
143  PrivateSegmentBuffer = true;
144 
145  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
146  DispatchPtr = true;
147 
148  if (F.hasFnAttribute("amdgpu-queue-ptr"))
149  QueuePtr = true;
150 
151  if (F.hasFnAttribute("amdgpu-dispatch-id"))
152  DispatchID = true;
153  } else if (ST.isMesaGfxShader(F)) {
154  if (HasStackObjects || MaySpill)
155  ImplicitBufferPtr = true;
156  }
157 
158  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
159  KernargSegmentPtr = true;
160 
161  if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
162  // TODO: This could be refined a lot. The attribute is a poor way of
163  // detecting calls that may require it before argument lowering.
164  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
165  FlatScratchInit = true;
166  }
167 
168  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
169  StringRef S = A.getValueAsString();
170  if (!S.empty())
171  S.consumeInteger(0, GITPtrHigh);
172 
173  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
174  S = A.getValueAsString();
175  if (!S.empty())
176  S.consumeInteger(0, HighBitsOf32BitAddress);
177 }
178 
181  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
183  MF.getFunction()));
184 }
185 
187  const SIRegisterInfo &TRI) {
188  ArgInfo.PrivateSegmentBuffer =
189  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
190  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
191  NumUserSGPRs += 4;
192  return ArgInfo.PrivateSegmentBuffer.getRegister();
193 }
194 
196  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
197  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
198  NumUserSGPRs += 2;
199  return ArgInfo.DispatchPtr.getRegister();
200 }
201 
203  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
205  NumUserSGPRs += 2;
206  return ArgInfo.QueuePtr.getRegister();
207 }
208 
210  ArgInfo.KernargSegmentPtr
211  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
212  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
213  NumUserSGPRs += 2;
214  return ArgInfo.KernargSegmentPtr.getRegister();
215 }
216 
218  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
219  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
220  NumUserSGPRs += 2;
221  return ArgInfo.DispatchID.getRegister();
222 }
223 
225  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
226  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227  NumUserSGPRs += 2;
228  return ArgInfo.FlatScratchInit.getRegister();
229 }
230 
232  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234  NumUserSGPRs += 2;
235  return ArgInfo.ImplicitBufferPtr.getRegister();
236 }
237 
238 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
239  for (unsigned I = 0; CSRegs[I]; ++I) {
240  if (CSRegs[I] == Reg)
241  return true;
242  }
243 
244  return false;
245 }
246 
247 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
249  int FI) {
250  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
251 
252  // This has already been allocated.
253  if (!SpillLanes.empty())
254  return true;
255 
256  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
257  const SIRegisterInfo *TRI = ST.getRegisterInfo();
258  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
260  unsigned WaveSize = ST.getWavefrontSize();
261 
262  unsigned Size = FrameInfo.getObjectSize(FI);
263  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
264  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
265 
266  int NumLanes = Size / 4;
267 
268  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
269 
270  // Make sure to handle the case where a wide SGPR spill may span between two
271  // VGPRs.
272  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
273  unsigned LaneVGPR;
274  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
275 
276  if (VGPRIndex == 0) {
277  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
278  if (LaneVGPR == AMDGPU::NoRegister) {
279  // We have no VGPRs left for spilling SGPRs. Reset because we will not
280  // partially spill the SGPR to VGPRs.
281  SGPRToVGPRSpills.erase(FI);
282  NumVGPRSpillLanes -= I;
283  return false;
284  }
285 
286  Optional<int> CSRSpillFI;
287  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
288  isCalleeSavedReg(CSRegs, LaneVGPR)) {
289  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
290  }
291 
292  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
293 
294  // Add this register as live-in to all blocks to avoid machine verifer
295  // complaining about use of an undefined physical register.
296  for (MachineBasicBlock &BB : MF)
297  BB.addLiveIn(LaneVGPR);
298  } else {
299  LaneVGPR = SpillVGPRs.back().VGPR;
300  }
301 
302  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
303  }
304 
305  return true;
306 }
307 
309  for (auto &R : SGPRToVGPRSpills)
310  MFI.RemoveStackObject(R.first);
311 }
312 
313 
314 /// \returns VGPR used for \p Dim' work item ID.
315 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
316  switch (Dim) {
317  case 0:
319  return AMDGPU::VGPR0;
320  case 1:
322  return AMDGPU::VGPR1;
323  case 2:
325  return AMDGPU::VGPR2;
326  }
327  llvm_unreachable("unexpected dimension");
328 }
329 
330 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
331  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
332  return AMDGPU::SGPR0 + NumUserSGPRs;
333 }
334 
335 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
336  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
337 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
bool isAmdCodeObjectV2(const Function &F) const
unsigned const TargetRegisterInfo * TRI
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
bool isMesaGfxShader(const Function &F) const
unsigned addDispatchID(const SIRegisterInfo &TRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void limitOccupancy(const MachineFunction &MF)
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:531
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool debuggerEmitPrologue() const
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Generation getGeneration() const
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
unsigned getAlignmentForImplicitArgPtr() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isVGPRSpillingEnabled(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
#define I(x, y, z)
Definition: MD5.cpp:58
uint32_t Size
Definition: Profile.cpp:47
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getWorkItemIDVGPR(unsigned Dim) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override