LLVM  6.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
12 #include "AMDGPUSubtarget.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  BufferPSV(*(MF.getSubtarget().getInstrInfo())),
32  ImagePSV(*(MF.getSubtarget().getInstrInfo())),
33  PrivateSegmentBuffer(false),
34  DispatchPtr(false),
35  QueuePtr(false),
36  KernargSegmentPtr(false),
37  DispatchID(false),
38  FlatScratchInit(false),
39  GridWorkgroupCountX(false),
40  GridWorkgroupCountY(false),
41  GridWorkgroupCountZ(false),
42  WorkGroupIDX(false),
43  WorkGroupIDY(false),
44  WorkGroupIDZ(false),
45  WorkGroupInfo(false),
46  PrivateSegmentWaveByteOffset(false),
47  WorkItemIDX(false),
48  WorkItemIDY(false),
49  WorkItemIDZ(false),
50  ImplicitBufferPtr(false),
51  ImplicitArgPtr(false),
52  GITPtrHigh(0xffffffff) {
53  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
54  const Function *F = MF.getFunction();
55  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
56  WavesPerEU = ST.getWavesPerEU(*F);
57 
58  if (!isEntryFunction()) {
59  // Non-entry functions have no special inputs for now, other registers
60  // required for scratch access.
61  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
62  ScratchWaveOffsetReg = AMDGPU::SGPR4;
63  FrameOffsetReg = AMDGPU::SGPR5;
64  StackPtrOffsetReg = AMDGPU::SGPR32;
65 
66  ArgInfo.PrivateSegmentBuffer =
67  ArgDescriptor::createRegister(ScratchRSrcReg);
69  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
70 
71  if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
72  ImplicitArgPtr = true;
73  } else {
74  if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
75  KernargSegmentPtr = true;
76  }
77 
78  CallingConv::ID CC = F->getCallingConv();
80  if (!F->arg_empty())
81  KernargSegmentPtr = true;
82  WorkGroupIDX = true;
83  WorkItemIDX = true;
84  } else if (CC == CallingConv::AMDGPU_PS) {
85  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
86  }
87 
88  if (ST.debuggerEmitPrologue()) {
89  // Enable everything.
90  WorkGroupIDX = true;
91  WorkGroupIDY = true;
92  WorkGroupIDZ = true;
93  WorkItemIDX = true;
94  WorkItemIDY = true;
95  WorkItemIDZ = true;
96  } else {
97  if (F->hasFnAttribute("amdgpu-work-group-id-x"))
98  WorkGroupIDX = true;
99 
100  if (F->hasFnAttribute("amdgpu-work-group-id-y"))
101  WorkGroupIDY = true;
102 
103  if (F->hasFnAttribute("amdgpu-work-group-id-z"))
104  WorkGroupIDZ = true;
105 
106  if (F->hasFnAttribute("amdgpu-work-item-id-x"))
107  WorkItemIDX = true;
108 
109  if (F->hasFnAttribute("amdgpu-work-item-id-y"))
110  WorkItemIDY = true;
111 
112  if (F->hasFnAttribute("amdgpu-work-item-id-z"))
113  WorkItemIDZ = true;
114  }
115 
116  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
117  bool MaySpill = ST.isVGPRSpillingEnabled(*F);
118  bool HasStackObjects = FrameInfo.hasStackObjects();
119 
120  if (isEntryFunction()) {
121  // X, XY, and XYZ are the only supported combinations, so make sure Y is
122  // enabled if Z is.
123  if (WorkItemIDZ)
124  WorkItemIDY = true;
125 
126  if (HasStackObjects || MaySpill) {
127  PrivateSegmentWaveByteOffset = true;
128 
129  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
130  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
133  = ArgDescriptor::createRegister(AMDGPU::SGPR5);
134  }
135  }
136 
137  bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
138  if (IsCOV2) {
139  if (HasStackObjects || MaySpill)
140  PrivateSegmentBuffer = true;
141 
142  if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
143  DispatchPtr = true;
144 
145  if (F->hasFnAttribute("amdgpu-queue-ptr"))
146  QueuePtr = true;
147 
148  if (F->hasFnAttribute("amdgpu-dispatch-id"))
149  DispatchID = true;
150  } else if (ST.isMesaGfxShader(MF)) {
151  if (HasStackObjects || MaySpill)
152  ImplicitBufferPtr = true;
153  }
154 
155  if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
156  KernargSegmentPtr = true;
157 
158  if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
159  // TODO: This could be refined a lot. The attribute is a poor way of
160  // detecting calls that may require it before argument lowering.
161  if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
162  FlatScratchInit = true;
163  }
164 
165  Attribute A = F->getFnAttribute("amdgpu-git-ptr-high");
166  StringRef S = A.getValueAsString();
167  if (!S.empty())
168  S.consumeInteger(0, GITPtrHigh);
169 }
170 
172  const SIRegisterInfo &TRI) {
173  ArgInfo.PrivateSegmentBuffer =
174  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
175  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
176  NumUserSGPRs += 4;
177  return ArgInfo.PrivateSegmentBuffer.getRegister();
178 }
179 
181  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
182  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
183  NumUserSGPRs += 2;
184  return ArgInfo.DispatchPtr.getRegister();
185 }
186 
188  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
189  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
190  NumUserSGPRs += 2;
191  return ArgInfo.QueuePtr.getRegister();
192 }
193 
195  ArgInfo.KernargSegmentPtr
196  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
197  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
198  NumUserSGPRs += 2;
199  return ArgInfo.KernargSegmentPtr.getRegister();
200 }
201 
203  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
205  NumUserSGPRs += 2;
206  return ArgInfo.DispatchID.getRegister();
207 }
208 
210  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212  NumUserSGPRs += 2;
213  return ArgInfo.FlatScratchInit.getRegister();
214 }
215 
217  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219  NumUserSGPRs += 2;
220  return ArgInfo.ImplicitBufferPtr.getRegister();
221 }
222 
223 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
224  for (unsigned I = 0; CSRegs[I]; ++I) {
225  if (CSRegs[I] == Reg)
226  return true;
227  }
228 
229  return false;
230 }
231 
232 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
234  int FI) {
235  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
236 
237  // This has already been allocated.
238  if (!SpillLanes.empty())
239  return true;
240 
241  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
242  const SIRegisterInfo *TRI = ST.getRegisterInfo();
243  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
245  unsigned WaveSize = ST.getWavefrontSize();
246 
247  unsigned Size = FrameInfo.getObjectSize(FI);
248  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
249  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
250 
251  int NumLanes = Size / 4;
252 
253  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
254 
255  // Make sure to handle the case where a wide SGPR spill may span between two
256  // VGPRs.
257  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
258  unsigned LaneVGPR;
259  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
260 
261  if (VGPRIndex == 0) {
262  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
263  if (LaneVGPR == AMDGPU::NoRegister) {
264  // We have no VGPRs left for spilling SGPRs. Reset because we will not
265  // partially spill the SGPR to VGPRs.
266  SGPRToVGPRSpills.erase(FI);
267  NumVGPRSpillLanes -= I;
268  return false;
269  }
270 
271  Optional<int> CSRSpillFI;
272  if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
273  // TODO: Should this be a CreateSpillStackObject? This is technically a
274  // weird CSR spill.
275  CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
276  }
277 
278  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
279 
280  // Add this register as live-in to all blocks to avoid machine verifer
281  // complaining about use of an undefined physical register.
282  for (MachineBasicBlock &BB : MF)
283  BB.addLiveIn(LaneVGPR);
284  } else {
285  LaneVGPR = SpillVGPRs.back().VGPR;
286  }
287 
288  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
289  }
290 
291  return true;
292 }
293 
295  for (auto &R : SGPRToVGPRSpills)
296  MFI.RemoveStackObject(R.first);
297 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
bool isVGPRSpillingEnabled(const Function &F) const
Interface definition for SIRegisterInfo.
Generation getGeneration() const
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
F(f)
SIMachineFunctionInfo(const MachineFunction &MF)
unsigned addDispatchID(const SIRegisterInfo &TRI)
Reg
All possible values of the reg field in the ModR/M byte.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:531
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:133
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool debuggerEmitPrologue() const
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned const MachineRegisterInfo * MRI
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static ArgDescriptor createRegister(unsigned Reg)
unsigned addQueuePtr(const SIRegisterInfo &TRI)
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
unsigned getWavefrontSize() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasFlatAddressSpace() const
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
#define I(x, y, z)
Definition: MD5.cpp:58
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
bool isAmdCodeObjectV2(const MachineFunction &MF) const
unsigned getRegister() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isMesaGfxShader(const MachineFunction &MF) const
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
bool hasCalls() const
Return true if the current function has any function calls.