LLVM  7.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  PrivateSegmentBuffer(false),
32  DispatchPtr(false),
33  QueuePtr(false),
34  KernargSegmentPtr(false),
35  DispatchID(false),
36  FlatScratchInit(false),
37  GridWorkgroupCountX(false),
38  GridWorkgroupCountY(false),
39  GridWorkgroupCountZ(false),
40  WorkGroupIDX(false),
41  WorkGroupIDY(false),
42  WorkGroupIDZ(false),
43  WorkGroupInfo(false),
44  PrivateSegmentWaveByteOffset(false),
45  WorkItemIDX(false),
46  WorkItemIDY(false),
47  WorkItemIDZ(false),
48  ImplicitBufferPtr(false),
49  ImplicitArgPtr(false),
50  GITPtrHigh(0xffffffff) {
51  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
52  const Function &F = MF.getFunction();
53  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54  WavesPerEU = ST.getWavesPerEU(F);
55 
56  if (!isEntryFunction()) {
57  // Non-entry functions have no special inputs for now, other registers
58  // required for scratch access.
59  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
60  ScratchWaveOffsetReg = AMDGPU::SGPR4;
61  FrameOffsetReg = AMDGPU::SGPR5;
62  StackPtrOffsetReg = AMDGPU::SGPR32;
63 
64  ArgInfo.PrivateSegmentBuffer =
65  ArgDescriptor::createRegister(ScratchRSrcReg);
67  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
68 
69  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
70  ImplicitArgPtr = true;
71  } else {
72  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
73  KernargSegmentPtr = true;
74  }
75 
76  CallingConv::ID CC = F.getCallingConv();
78  if (!F.arg_empty())
79  KernargSegmentPtr = true;
80  WorkGroupIDX = true;
81  WorkItemIDX = true;
82  } else if (CC == CallingConv::AMDGPU_PS) {
83  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
84  }
85 
86  if (ST.debuggerEmitPrologue()) {
87  // Enable everything.
88  WorkGroupIDX = true;
89  WorkGroupIDY = true;
90  WorkGroupIDZ = true;
91  WorkItemIDX = true;
92  WorkItemIDY = true;
93  WorkItemIDZ = true;
94  } else {
95  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
96  WorkGroupIDX = true;
97 
98  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
99  WorkGroupIDY = true;
100 
101  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
102  WorkGroupIDZ = true;
103 
104  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
105  WorkItemIDX = true;
106 
107  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
108  WorkItemIDY = true;
109 
110  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
111  WorkItemIDZ = true;
112  }
113 
114  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
115  bool MaySpill = ST.isVGPRSpillingEnabled(F);
116  bool HasStackObjects = FrameInfo.hasStackObjects();
117 
118  if (isEntryFunction()) {
119  // X, XY, and XYZ are the only supported combinations, so make sure Y is
120  // enabled if Z is.
121  if (WorkItemIDZ)
122  WorkItemIDY = true;
123 
124  if (HasStackObjects || MaySpill) {
125  PrivateSegmentWaveByteOffset = true;
126 
127  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
128  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
131  = ArgDescriptor::createRegister(AMDGPU::SGPR5);
132  }
133  }
134 
135  bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
136  if (IsCOV2) {
137  if (HasStackObjects || MaySpill)
138  PrivateSegmentBuffer = true;
139 
140  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
141  DispatchPtr = true;
142 
143  if (F.hasFnAttribute("amdgpu-queue-ptr"))
144  QueuePtr = true;
145 
146  if (F.hasFnAttribute("amdgpu-dispatch-id"))
147  DispatchID = true;
148  } else if (ST.isMesaGfxShader(MF)) {
149  if (HasStackObjects || MaySpill)
150  ImplicitBufferPtr = true;
151  }
152 
153  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
154  KernargSegmentPtr = true;
155 
156  if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
157  // TODO: This could be refined a lot. The attribute is a poor way of
158  // detecting calls that may require it before argument lowering.
159  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
160  FlatScratchInit = true;
161  }
162 
163  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
164  StringRef S = A.getValueAsString();
165  if (!S.empty())
166  S.consumeInteger(0, GITPtrHigh);
167 }
168 
170  const SIRegisterInfo &TRI) {
171  ArgInfo.PrivateSegmentBuffer =
172  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
173  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
174  NumUserSGPRs += 4;
175  return ArgInfo.PrivateSegmentBuffer.getRegister();
176 }
177 
179  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
180  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
181  NumUserSGPRs += 2;
182  return ArgInfo.DispatchPtr.getRegister();
183 }
184 
186  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
187  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
188  NumUserSGPRs += 2;
189  return ArgInfo.QueuePtr.getRegister();
190 }
191 
193  ArgInfo.KernargSegmentPtr
194  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
195  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
196  NumUserSGPRs += 2;
197  return ArgInfo.KernargSegmentPtr.getRegister();
198 }
199 
201  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
202  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
203  NumUserSGPRs += 2;
204  return ArgInfo.DispatchID.getRegister();
205 }
206 
208  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
209  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210  NumUserSGPRs += 2;
211  return ArgInfo.FlatScratchInit.getRegister();
212 }
213 
215  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
216  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217  NumUserSGPRs += 2;
218  return ArgInfo.ImplicitBufferPtr.getRegister();
219 }
220 
221 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
222  for (unsigned I = 0; CSRegs[I]; ++I) {
223  if (CSRegs[I] == Reg)
224  return true;
225  }
226 
227  return false;
228 }
229 
230 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
232  int FI) {
233  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
234 
235  // This has already been allocated.
236  if (!SpillLanes.empty())
237  return true;
238 
239  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
240  const SIRegisterInfo *TRI = ST.getRegisterInfo();
241  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
243  unsigned WaveSize = ST.getWavefrontSize();
244 
245  unsigned Size = FrameInfo.getObjectSize(FI);
246  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
247  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
248 
249  int NumLanes = Size / 4;
250 
251  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
252 
253  // Make sure to handle the case where a wide SGPR spill may span between two
254  // VGPRs.
255  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
256  unsigned LaneVGPR;
257  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
258 
259  if (VGPRIndex == 0) {
260  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
261  if (LaneVGPR == AMDGPU::NoRegister) {
262  // We have no VGPRs left for spilling SGPRs. Reset because we will not
263  // partially spill the SGPR to VGPRs.
264  SGPRToVGPRSpills.erase(FI);
265  NumVGPRSpillLanes -= I;
266  return false;
267  }
268 
269  Optional<int> CSRSpillFI;
270  if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
271  // TODO: Should this be a CreateSpillStackObject? This is technically a
272  // weird CSR spill.
273  CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
274  }
275 
276  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
277 
278  // Add this register as live-in to all blocks to avoid machine verifer
279  // complaining about use of an undefined physical register.
280  for (MachineBasicBlock &BB : MF)
281  BB.addLiveIn(LaneVGPR);
282  } else {
283  LaneVGPR = SpillVGPRs.back().VGPR;
284  }
285 
286  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
287  }
288 
289  return true;
290 }
291 
293  for (auto &R : SGPRToVGPRSpills)
294  MFI.RemoveStackObject(R.first);
295 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
bool isVGPRSpillingEnabled(const Function &F) const
Interface definition for SIRegisterInfo.
Generation getGeneration() const
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
unsigned addDispatchID(const SIRegisterInfo &TRI)
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:531
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool debuggerEmitPrologue() const
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
unsigned getWavefrontSize() const
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
const SIRegisterInfo * getRegisterInfo() const override
bool hasFlatAddressSpace() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isMesaGfxShader(const MachineFunction &MF) const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasCalls() const
Return true if the current function has any function calls.