LLVM  7.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  PrivateSegmentBuffer(false),
32  DispatchPtr(false),
33  QueuePtr(false),
34  KernargSegmentPtr(false),
35  DispatchID(false),
36  FlatScratchInit(false),
37  GridWorkgroupCountX(false),
38  GridWorkgroupCountY(false),
39  GridWorkgroupCountZ(false),
40  WorkGroupIDX(false),
41  WorkGroupIDY(false),
42  WorkGroupIDZ(false),
43  WorkGroupInfo(false),
44  PrivateSegmentWaveByteOffset(false),
45  WorkItemIDX(false),
46  WorkItemIDY(false),
47  WorkItemIDZ(false),
48  ImplicitBufferPtr(false),
49  ImplicitArgPtr(false),
50  GITPtrHigh(0xffffffff),
51  HighBitsOf32BitAddress(0) {
52  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
53  const Function &F = MF.getFunction();
54  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
55  WavesPerEU = ST.getWavesPerEU(F);
56 
57  if (!isEntryFunction()) {
58  // Non-entry functions have no special inputs for now, other registers
59  // required for scratch access.
60  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
61  ScratchWaveOffsetReg = AMDGPU::SGPR4;
62  FrameOffsetReg = AMDGPU::SGPR5;
63  StackPtrOffsetReg = AMDGPU::SGPR32;
64 
65  ArgInfo.PrivateSegmentBuffer =
66  ArgDescriptor::createRegister(ScratchRSrcReg);
68  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
69 
70  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
71  ImplicitArgPtr = true;
72  } else {
73  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
74  KernargSegmentPtr = true;
75  }
76 
77  CallingConv::ID CC = F.getCallingConv();
79  if (!F.arg_empty())
80  KernargSegmentPtr = true;
81  WorkGroupIDX = true;
82  WorkItemIDX = true;
83  } else if (CC == CallingConv::AMDGPU_PS) {
84  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
85  }
86 
87  if (ST.debuggerEmitPrologue()) {
88  // Enable everything.
89  WorkGroupIDX = true;
90  WorkGroupIDY = true;
91  WorkGroupIDZ = true;
92  WorkItemIDX = true;
93  WorkItemIDY = true;
94  WorkItemIDZ = true;
95  } else {
96  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
97  WorkGroupIDX = true;
98 
99  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
100  WorkGroupIDY = true;
101 
102  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
103  WorkGroupIDZ = true;
104 
105  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
106  WorkItemIDX = true;
107 
108  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
109  WorkItemIDY = true;
110 
111  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
112  WorkItemIDZ = true;
113  }
114 
115  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
116  bool MaySpill = ST.isVGPRSpillingEnabled(F);
117  bool HasStackObjects = FrameInfo.hasStackObjects();
118 
119  if (isEntryFunction()) {
120  // X, XY, and XYZ are the only supported combinations, so make sure Y is
121  // enabled if Z is.
122  if (WorkItemIDZ)
123  WorkItemIDY = true;
124 
125  if (HasStackObjects || MaySpill) {
126  PrivateSegmentWaveByteOffset = true;
127 
128  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
129  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
132  = ArgDescriptor::createRegister(AMDGPU::SGPR5);
133  }
134  }
135 
136  bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
137  if (IsCOV2) {
138  if (HasStackObjects || MaySpill)
139  PrivateSegmentBuffer = true;
140 
141  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
142  DispatchPtr = true;
143 
144  if (F.hasFnAttribute("amdgpu-queue-ptr"))
145  QueuePtr = true;
146 
147  if (F.hasFnAttribute("amdgpu-dispatch-id"))
148  DispatchID = true;
149  } else if (ST.isMesaGfxShader(MF)) {
150  if (HasStackObjects || MaySpill)
151  ImplicitBufferPtr = true;
152  }
153 
154  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
155  KernargSegmentPtr = true;
156 
157  if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
158  // TODO: This could be refined a lot. The attribute is a poor way of
159  // detecting calls that may require it before argument lowering.
160  if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
161  FlatScratchInit = true;
162  }
163 
164  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
165  StringRef S = A.getValueAsString();
166  if (!S.empty())
167  S.consumeInteger(0, GITPtrHigh);
168 
169  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
170  S = A.getValueAsString();
171  if (!S.empty())
172  S.consumeInteger(0, HighBitsOf32BitAddress);
173 }
174 
176  const SIRegisterInfo &TRI) {
177  ArgInfo.PrivateSegmentBuffer =
178  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
179  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
180  NumUserSGPRs += 4;
181  return ArgInfo.PrivateSegmentBuffer.getRegister();
182 }
183 
185  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
186  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
187  NumUserSGPRs += 2;
188  return ArgInfo.DispatchPtr.getRegister();
189 }
190 
192  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
193  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
194  NumUserSGPRs += 2;
195  return ArgInfo.QueuePtr.getRegister();
196 }
197 
199  ArgInfo.KernargSegmentPtr
200  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
201  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
202  NumUserSGPRs += 2;
203  return ArgInfo.KernargSegmentPtr.getRegister();
204 }
205 
207  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
208  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
209  NumUserSGPRs += 2;
210  return ArgInfo.DispatchID.getRegister();
211 }
212 
214  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
215  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
216  NumUserSGPRs += 2;
217  return ArgInfo.FlatScratchInit.getRegister();
218 }
219 
221  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
222  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
223  NumUserSGPRs += 2;
224  return ArgInfo.ImplicitBufferPtr.getRegister();
225 }
226 
227 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
228  for (unsigned I = 0; CSRegs[I]; ++I) {
229  if (CSRegs[I] == Reg)
230  return true;
231  }
232 
233  return false;
234 }
235 
236 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
238  int FI) {
239  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
240 
241  // This has already been allocated.
242  if (!SpillLanes.empty())
243  return true;
244 
245  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
246  const SIRegisterInfo *TRI = ST.getRegisterInfo();
247  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
249  unsigned WaveSize = ST.getWavefrontSize();
250 
251  unsigned Size = FrameInfo.getObjectSize(FI);
252  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
253  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
254 
255  int NumLanes = Size / 4;
256 
257  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
258 
259  // Make sure to handle the case where a wide SGPR spill may span between two
260  // VGPRs.
261  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
262  unsigned LaneVGPR;
263  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
264 
265  if (VGPRIndex == 0) {
266  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
267  if (LaneVGPR == AMDGPU::NoRegister) {
268  // We have no VGPRs left for spilling SGPRs. Reset because we will not
269  // partially spill the SGPR to VGPRs.
270  SGPRToVGPRSpills.erase(FI);
271  NumVGPRSpillLanes -= I;
272  return false;
273  }
274 
275  Optional<int> CSRSpillFI;
276  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
277  isCalleeSavedReg(CSRegs, LaneVGPR)) {
278  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
279  }
280 
281  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
282 
283  // Add this register as live-in to all blocks to avoid machine verifer
284  // complaining about use of an undefined physical register.
285  for (MachineBasicBlock &BB : MF)
286  BB.addLiveIn(LaneVGPR);
287  } else {
288  LaneVGPR = SpillVGPRs.back().VGPR;
289  }
290 
291  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
292  }
293 
294  return true;
295 }
296 
298  for (auto &R : SGPRToVGPRSpills)
299  MFI.RemoveStackObject(R.first);
300 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
bool isVGPRSpillingEnabled(const Function &F) const
Interface definition for SIRegisterInfo.
Generation getGeneration() const
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned addDispatchID(const SIRegisterInfo &TRI)
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:531
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool debuggerEmitPrologue() const
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
unsigned addQueuePtr(const SIRegisterInfo &TRI)
int CreateSpillStackObject(uint64_t Size, unsigned Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
unsigned getWavefrontSize() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasFlatAddressSpace() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:194
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
bool isMesaGfxShader(const MachineFunction &MF) const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasCalls() const
Return true if the current function has any function calls.