LLVM  4.0.0
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
18 
19 #define MAX_LANES 64
20 
21 using namespace llvm;
22 
24  "amdgpu-spill-sgpr-to-vgpr",
25  cl::desc("Enable spilling VGPRs to SGPRs"),
27  cl::init(true));
28 
31  TIDReg(AMDGPU::NoRegister),
32  ScratchRSrcReg(AMDGPU::NoRegister),
33  ScratchWaveOffsetReg(AMDGPU::NoRegister),
34  PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35  DispatchPtrUserSGPR(AMDGPU::NoRegister),
36  QueuePtrUserSGPR(AMDGPU::NoRegister),
37  KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38  DispatchIDUserSGPR(AMDGPU::NoRegister),
39  FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40  PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41  GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42  GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43  GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44  WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45  WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46  WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47  WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48  PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
49  PSInputAddr(0),
50  ReturnsVoid(true),
51  FlatWorkGroupSizes(0, 0),
52  WavesPerEU(0, 0),
53  DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
56  PSInputEna(0),
57  NumUserSGPRs(0),
58  NumSystemSGPRs(0),
59  HasSpilledSGPRs(false),
60  HasSpilledVGPRs(false),
62  NumSpilledSGPRs(0),
63  NumSpilledVGPRs(0),
64  PrivateSegmentBuffer(false),
65  DispatchPtr(false),
66  QueuePtr(false),
67  KernargSegmentPtr(false),
68  DispatchID(false),
69  FlatScratchInit(false),
70  GridWorkgroupCountX(false),
71  GridWorkgroupCountY(false),
72  GridWorkgroupCountZ(false),
73  WorkGroupIDX(false),
74  WorkGroupIDY(false),
75  WorkGroupIDZ(false),
76  WorkGroupInfo(false),
78  WorkItemIDX(false),
79  WorkItemIDY(false),
80  WorkItemIDZ(false),
82  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
83  const Function *F = MF.getFunction();
84 
85  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
86 
87  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
88 
89  if (!AMDGPU::isShader(F->getCallingConv())) {
90  KernargSegmentPtr = true;
91  WorkGroupIDX = true;
92  WorkItemIDX = true;
93  }
94 
95  if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
96  WorkGroupIDY = true;
97 
98  if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
99  WorkGroupIDZ = true;
100 
101  if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
102  WorkItemIDY = true;
103 
104  if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
105  WorkItemIDZ = true;
106 
107  // X, XY, and XYZ are the only supported combinations, so make sure Y is
108  // enabled if Z is.
109  if (WorkItemIDZ)
110  WorkItemIDY = true;
111 
112  bool MaySpill = ST.isVGPRSpillingEnabled(*F);
113  bool HasStackObjects = FrameInfo.hasStackObjects();
114 
115  if (HasStackObjects || MaySpill)
117 
118  if (ST.isAmdCodeObjectV2(MF)) {
119  if (HasStackObjects || MaySpill)
120  PrivateSegmentBuffer = true;
121 
122  if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
123  DispatchPtr = true;
124 
125  if (F->hasFnAttribute("amdgpu-queue-ptr"))
126  QueuePtr = true;
127 
128  if (F->hasFnAttribute("amdgpu-dispatch-id"))
129  DispatchID = true;
130  } else if (ST.isMesaGfxShader(MF)) {
131  if (HasStackObjects || MaySpill)
132  PrivateMemoryInputPtr = true;
133  }
134 
135  // We don't need to worry about accessing spills with flat instructions.
136  // TODO: On VI where we must use flat for global, we should be able to omit
137  // this if it is never used for generic access.
138  if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
139  ST.isAmdHsaOS())
140  FlatScratchInit = true;
141 
142  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
143  WavesPerEU = ST.getWavesPerEU(*F);
144 }
145 
147  const SIRegisterInfo &TRI) {
148  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
149  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
150  NumUserSGPRs += 4;
151  return PrivateSegmentBufferUserSGPR;
152 }
153 
155  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
156  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
157  NumUserSGPRs += 2;
158  return DispatchPtrUserSGPR;
159 }
160 
162  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
163  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
164  NumUserSGPRs += 2;
165  return QueuePtrUserSGPR;
166 }
167 
169  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
170  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
171  NumUserSGPRs += 2;
172  return KernargSegmentPtrUserSGPR;
173 }
174 
176  DispatchIDUserSGPR = TRI.getMatchingSuperReg(
177  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
178  NumUserSGPRs += 2;
179  return DispatchIDUserSGPR;
180 }
181 
183  FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
184  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
185  NumUserSGPRs += 2;
186  return FlatScratchInitUserSGPR;
187 }
188 
190  PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
191  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
192  NumUserSGPRs += 2;
193  return PrivateMemoryPtrUserSGPR;
194 }
195 
197  MachineFunction *MF,
198  unsigned FrameIndex,
199  unsigned SubIdx) {
201  return SpilledReg();
202 
203  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
204  const SIRegisterInfo *TRI = ST.getRegisterInfo();
205 
206  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
208  int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
209  Offset += SubIdx * 4;
210 
211  unsigned LaneVGPRIdx = Offset / (64 * 4);
212  unsigned Lane = (Offset / 4) % 64;
213 
214  struct SpilledReg Spill;
215  Spill.Lane = Lane;
216 
217  if (!LaneVGPRs.count(LaneVGPRIdx)) {
218  unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
219  *MF);
220 
221  if (LaneVGPR == AMDGPU::NoRegister)
222  // We have no VGPRs left for spilling SGPRs.
223  return Spill;
224 
225  LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
226 
227  // Add this register as live-in to all blocks to avoid machine verifer
228  // complaining about use of an undefined physical register.
229  for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
230  BI != BE; ++BI) {
231  BI->addLiveIn(LaneVGPR);
232  }
233  }
234 
235  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
236  return Spill;
237 }
WorkItemIDZ(false)
PrivateMemoryInputPtr(false)
AMDGPU specific subclass of TargetSubtarget.
HasSpilledVGPRs(false)
bool isVGPRSpillingEnabled(const Function &F) const
HasNonSpillStackObjects(false)
FlatScratchInit(false)
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
GridWorkgroupCountZ(false)
unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI)
PrivateSegmentWaveByteOffset(false)
SIMachineFunctionInfo(const MachineFunction &MF)
bool isAmdCodeObjectV2(const MachineFunction &MF) const
NumSpilledSGPRs(0)
DispatchPtr(false)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned addDispatchID(const SIRegisterInfo &TRI)
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
WorkGroupInfo(false)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
WorkGroupIDZ(false)
bool isMesaGfxShader(const MachineFunction &MF) const
WorkItemIDY(false)
#define F(x, y, z)
Definition: MD5.cpp:51
bool hasStackObjects() const
Return true if there are any stack objects in this function.
QueuePtr(false)
WorkGroupIDY(false)
Generation getGeneration() const
GridWorkgroupCountY(false)
PSInputEna(0)
GridWorkgroupCountX(false)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}})
unsigned const MachineRegisterInfo * MRI
bool isShader(CallingConv::ID cc)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
HasSpilledSGPRs(false)
uint32_t Offset
unsigned addQueuePtr(const SIRegisterInfo &TRI)
LDSWaveSpillSize(0)
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Iterator for intrusive lists based on ilist_node.
const SIRegisterInfo * getRegisterInfo() const override
SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx)
std::map< unsigned, unsigned > LaneVGPRs
bool isAmdHsaOS() const
WorkGroupIDX(false)
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Basic Alias true
NumSystemSGPRs(0)
Interface definition for SIInstrInfo.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
KernargSegmentPtr(false)
NumUserSGPRs(0)
NumSpilledVGPRs(0)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
DispatchID(false)
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
PrivateSegmentBuffer(false)
bool debuggerEmitPrologue() const
WorkItemIDX(false)