LLVM  10.0.0svn
SIMachineFunctionInfo.cpp
Go to the documentation of this file.
1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "AMDGPUSubtarget.h"
12 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/ADT/Optional.h"
20 #include "llvm/IR/CallingConv.h"
21 #include "llvm/IR/Function.h"
22 #include <cassert>
23 #include <vector>
24 
25 #define MAX_LANES 64
26 
27 using namespace llvm;
28 
31  Mode(MF.getFunction()),
32  PrivateSegmentBuffer(false),
33  DispatchPtr(false),
34  QueuePtr(false),
35  KernargSegmentPtr(false),
36  DispatchID(false),
37  FlatScratchInit(false),
38  WorkGroupIDX(false),
39  WorkGroupIDY(false),
40  WorkGroupIDZ(false),
41  WorkGroupInfo(false),
42  PrivateSegmentWaveByteOffset(false),
43  WorkItemIDX(false),
44  WorkItemIDY(false),
45  WorkItemIDZ(false),
46  ImplicitBufferPtr(false),
47  ImplicitArgPtr(false),
48  GITPtrHigh(0xffffffff),
49  HighBitsOf32BitAddress(0),
50  GDSSize(0) {
52  const Function &F = MF.getFunction();
53  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54  WavesPerEU = ST.getWavesPerEU(F);
55 
56  Occupancy = ST.computeOccupancy(MF, getLDSSize());
57  CallingConv::ID CC = F.getCallingConv();
58 
60  if (!F.arg_empty())
61  KernargSegmentPtr = true;
62  WorkGroupIDX = true;
63  WorkItemIDX = true;
64  } else if (CC == CallingConv::AMDGPU_PS) {
65  PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
66  }
67 
68  if (!isEntryFunction()) {
69  // Non-entry functions have no special inputs for now, other registers
70  // required for scratch access.
71  ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
72  ScratchWaveOffsetReg = AMDGPU::SGPR33;
73 
74  // TODO: Pick a high register, and shift down, similar to a kernel.
75  FrameOffsetReg = AMDGPU::SGPR34;
76  StackPtrOffsetReg = AMDGPU::SGPR32;
77 
78  ArgInfo.PrivateSegmentBuffer =
79  ArgDescriptor::createRegister(ScratchRSrcReg);
81  ArgDescriptor::createRegister(ScratchWaveOffsetReg);
82 
83  if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
84  ImplicitArgPtr = true;
85  } else {
86  if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
87  KernargSegmentPtr = true;
90  }
91  }
92 
93  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
94  WorkGroupIDX = true;
95 
96  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
97  WorkGroupIDY = true;
98 
99  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
100  WorkGroupIDZ = true;
101 
102  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
103  WorkItemIDX = true;
104 
105  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
106  WorkItemIDY = true;
107 
108  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
109  WorkItemIDZ = true;
110 
111  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
112  bool HasStackObjects = FrameInfo.hasStackObjects();
113 
114  if (isEntryFunction()) {
115  // X, XY, and XYZ are the only supported combinations, so make sure Y is
116  // enabled if Z is.
117  if (WorkItemIDZ)
118  WorkItemIDY = true;
119 
120  PrivateSegmentWaveByteOffset = true;
121 
122  // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
123  if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
126  ArgDescriptor::createRegister(AMDGPU::SGPR5);
127  }
128 
129  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
130  if (isAmdHsaOrMesa) {
131  PrivateSegmentBuffer = true;
132 
133  if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
134  DispatchPtr = true;
135 
136  if (F.hasFnAttribute("amdgpu-queue-ptr"))
137  QueuePtr = true;
138 
139  if (F.hasFnAttribute("amdgpu-dispatch-id"))
140  DispatchID = true;
141  } else if (ST.isMesaGfxShader(F)) {
142  ImplicitBufferPtr = true;
143  }
144 
145  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
146  KernargSegmentPtr = true;
147 
148  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
149  auto hasNonSpillStackObjects = [&]() {
150  // Avoid expensive checking if there's no stack objects.
151  if (!HasStackObjects)
152  return false;
153  for (auto OI = FrameInfo.getObjectIndexBegin(),
154  OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
155  if (!FrameInfo.isSpillSlotObjectIndex(OI))
156  return true;
157  // All stack objects are spill slots.
158  return false;
159  };
160  // TODO: This could be refined a lot. The attribute is a poor way of
161  // detecting calls that may require it before argument lowering.
162  if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
163  FlatScratchInit = true;
164  }
165 
166  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
167  StringRef S = A.getValueAsString();
168  if (!S.empty())
169  S.consumeInteger(0, GITPtrHigh);
170 
171  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
172  S = A.getValueAsString();
173  if (!S.empty())
174  S.consumeInteger(0, HighBitsOf32BitAddress);
175 
176  S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
177  if (!S.empty())
178  S.consumeInteger(0, GDSSize);
179 }
180 
183  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
185  MF.getFunction()));
186 }
187 
189  const SIRegisterInfo &TRI) {
190  ArgInfo.PrivateSegmentBuffer =
191  ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
192  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
193  NumUserSGPRs += 4;
194  return ArgInfo.PrivateSegmentBuffer.getRegister();
195 }
196 
198  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
199  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
200  NumUserSGPRs += 2;
201  return ArgInfo.DispatchPtr.getRegister();
202 }
203 
205  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
206  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
207  NumUserSGPRs += 2;
208  return ArgInfo.QueuePtr.getRegister();
209 }
210 
212  ArgInfo.KernargSegmentPtr
213  = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
214  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
215  NumUserSGPRs += 2;
216  return ArgInfo.KernargSegmentPtr.getRegister();
217 }
218 
220  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
221  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
222  NumUserSGPRs += 2;
223  return ArgInfo.DispatchID.getRegister();
224 }
225 
227  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
228  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
229  NumUserSGPRs += 2;
230  return ArgInfo.FlatScratchInit.getRegister();
231 }
232 
234  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
235  getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
236  NumUserSGPRs += 2;
237  return ArgInfo.ImplicitBufferPtr.getRegister();
238 }
239 
240 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
241  for (unsigned I = 0; CSRegs[I]; ++I) {
242  if (CSRegs[I] == Reg)
243  return true;
244  }
245 
246  return false;
247 }
248 
249 /// \p returns true if \p NumLanes slots are available in VGPRs already used for
250 /// SGPR spilling.
251 //
252 // FIXME: This only works after processFunctionBeforeFrameFinalized
254  unsigned NumNeed) const {
255  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
256  unsigned WaveSize = ST.getWavefrontSize();
257  return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
258 }
259 
260 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
262  int FI) {
263  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
264 
265  // This has already been allocated.
266  if (!SpillLanes.empty())
267  return true;
268 
269  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
270  const SIRegisterInfo *TRI = ST.getRegisterInfo();
271  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
273  unsigned WaveSize = ST.getWavefrontSize();
274 
275  unsigned Size = FrameInfo.getObjectSize(FI);
276  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
277  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
278 
279  int NumLanes = Size / 4;
280 
281  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
282 
283  // Make sure to handle the case where a wide SGPR spill may span between two
284  // VGPRs.
285  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
286  unsigned LaneVGPR;
287  unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
288 
289  if (VGPRIndex == 0) {
290  LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
291  if (LaneVGPR == AMDGPU::NoRegister) {
292  // We have no VGPRs left for spilling SGPRs. Reset because we will not
293  // partially spill the SGPR to VGPRs.
294  SGPRToVGPRSpills.erase(FI);
295  NumVGPRSpillLanes -= I;
296  return false;
297  }
298 
299  Optional<int> CSRSpillFI;
300  if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
301  isCalleeSavedReg(CSRegs, LaneVGPR)) {
302  CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
303  }
304 
305  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
306 
307  // Add this register as live-in to all blocks to avoid machine verifer
308  // complaining about use of an undefined physical register.
309  for (MachineBasicBlock &BB : MF)
310  BB.addLiveIn(LaneVGPR);
311  } else {
312  LaneVGPR = SpillVGPRs.back().VGPR;
313  }
314 
315  SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
316  }
317 
318  return true;
319 }
320 
321 /// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
322 /// Either AGPR is spilled to VGPR to vice versa.
323 /// Returns true if a \p FI can be eliminated completely.
325  int FI,
326  bool isAGPRtoVGPR) {
328  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
329  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
330 
331  assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
332 
333  auto &Spill = VGPRToAGPRSpills[FI];
334 
335  // This has already been allocated.
336  if (!Spill.Lanes.empty())
337  return Spill.FullyAllocated;
338 
339  unsigned Size = FrameInfo.getObjectSize(FI);
340  unsigned NumLanes = Size / 4;
341  Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
342 
343  const TargetRegisterClass &RC =
344  isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
345  auto Regs = RC.getRegisters();
346 
347  auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
348  const SIRegisterInfo *TRI = ST.getRegisterInfo();
349  Spill.FullyAllocated = true;
350 
351  // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
352  // once.
353  BitVector OtherUsedRegs;
354  OtherUsedRegs.resize(TRI->getNumRegs());
355 
356  const uint32_t *CSRMask =
358  if (CSRMask)
359  OtherUsedRegs.setBitsInMask(CSRMask);
360 
361  // TODO: Should include register tuples, but doesn't matter with current
362  // usage.
363  for (MCPhysReg Reg : SpillAGPR)
364  OtherUsedRegs.set(Reg);
365  for (MCPhysReg Reg : SpillVGPR)
366  OtherUsedRegs.set(Reg);
367 
369  for (unsigned I = 0; I < NumLanes; ++I) {
370  NextSpillReg = std::find_if(
371  NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
372  return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
373  !OtherUsedRegs[Reg];
374  });
375 
376  if (NextSpillReg == Regs.end()) { // Registers exhausted
377  Spill.FullyAllocated = false;
378  break;
379  }
380 
381  OtherUsedRegs.set(*NextSpillReg);
382  SpillRegs.push_back(*NextSpillReg);
383  Spill.Lanes[I] = *NextSpillReg++;
384  }
385 
386  return Spill.FullyAllocated;
387 }
388 
390  // The FP spill hasn't been inserted yet, so keep it around.
391  for (auto &R : SGPRToVGPRSpills) {
392  if (R.first != FramePointerSaveIndex)
393  MFI.RemoveStackObject(R.first);
394  }
395 
396  // All other SPGRs must be allocated on the default stack, so reset the stack
397  // ID.
398  for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
399  ++i)
400  if (i != FramePointerSaveIndex)
402 
403  for (auto &R : VGPRToAGPRSpills) {
404  if (R.second.FullyAllocated)
405  MFI.RemoveStackObject(R.first);
406  }
407 }
408 
409 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
410  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
411  return AMDGPU::SGPR0 + NumUserSGPRs;
412 }
413 
414 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
415  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
416 }
417 
419  const TargetRegisterInfo &TRI) {
420  yaml::StringValue Dest;
421  {
422  raw_string_ostream OS(Dest.Value);
423  OS << printReg(Reg, &TRI);
424  }
425  return Dest;
426 }
427 
430  const TargetRegisterInfo &TRI) {
432 
433  auto convertArg = [&](Optional<yaml::SIArgument> &A,
434  const ArgDescriptor &Arg) {
435  if (!Arg)
436  return false;
437 
438  // Create a register or stack argument.
440  if (Arg.isRegister()) {
442  OS << printReg(Arg.getRegister(), &TRI);
443  } else
444  SA.StackOffset = Arg.getStackOffset();
445  // Check and update the optional mask.
446  if (Arg.isMasked())
447  SA.Mask = Arg.getMask();
448 
449  A = SA;
450  return true;
451  };
452 
453  bool Any = false;
454  Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
455  Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
456  Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
457  Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
458  Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
459  Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
460  Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
461  Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
462  Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
463  Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
464  Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
465  Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
467  Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
468  Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
469  Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
470  Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
471  Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
472 
473  if (Any)
474  return AI;
475 
476  return None;
477 }
478 
480  const llvm::SIMachineFunctionInfo& MFI,
481  const TargetRegisterInfo &TRI)
484  LDSSize(MFI.getLDSSize()),
487  MemoryBound(MFI.isMemoryBound()),
489  HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
490  ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
491  ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
492  FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
493  StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
494  ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
495  Mode(MFI.getMode()) {}
496 
499 }
500 
502  const yaml::SIMachineFunctionInfo &YamlMFI) {
505  LDSSize = YamlMFI.LDSSize;
509  MemoryBound = YamlMFI.MemoryBound;
510  WaveLimiter = YamlMFI.WaveLimiter;
511  return false;
512 }
unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI)
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:371
Optional< SIArgument > PrivateSegmentWaveByteOffset
BitVector & set()
Definition: BitVector.h:397
Definition: Any.h:26
Interface definition for SIRegisterInfo.
Register getRegister() const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
AMDGPU specific subclass of TargetSubtarget.
SI Whole Quad Mode
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:199
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
Optional< SIArgument > KernargSegmentPtr
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI)
unsigned Reg
Optional< SIArgument > PrivateSegmentSize
static SIArgument createArgument(bool IsReg)
unsigned const TargetRegisterInfo * TRI
F(f)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void removeDeadFrameIndices(MachineFrameInfo &MFI)
SIMachineFunctionInfo(const MachineFunction &MF)
Optional< SIArgument > WorkItemIDY
AMDGPU::SIModeRegisterDefaults getMode() const
bool isMesaGfxShader(const Function &F) const
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
unsigned addDispatchID(const SIRegisterInfo &TRI)
Optional< SIArgument > DispatchPtr
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
static yaml::StringValue regToString(unsigned Reg, const TargetRegisterInfo &TRI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Optional< SIArgument > ImplicitArgPtr
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:140
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Optional< SIArgument > FlatScratchInit
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
void limitOccupancy(const MachineFunction &MF)
Align getAlignmentForImplicitArgPtr() const
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:526
int getObjectIndexBegin() const
Return the minimum frame object index.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:196
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:258
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
Optional< SIArgument > WorkItemIDX
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
unsigned const MachineRegisterInfo * MRI
Optional< SIArgument > QueuePtr
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Optional< SIArgument > DispatchID
unsigned addQueuePtr(const SIRegisterInfo &TRI)
constexpr double e
Definition: MathExtras.h:57
Generation getGeneration() const
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
AMDGPUFunctionArgInfo & getArgInfo()
void setStackID(int ObjectIdx, uint8_t ID)
Optional< SIArgument > WorkGroupIDZ
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void push_back(bool Val)
Definition: BitVector.h:506
unsigned addDispatchPtr(const SIRegisterInfo &TRI)
bool hasFlatAddressSpace() const
unsigned getWavefrontSize() const
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
unsigned LDSSize
Number of bytes in the LDS that are being used.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
unsigned getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses...
Optional< unsigned > Mask
Optional< SIArgument > PrivateSegmentBuffer
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool isPhysRegUsed(unsigned PhysReg) const
Return true if the specified register is modified or read in this function.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:141
Optional< SIArgument > WorkItemIDZ
Optional< SIArgument > WorkGroupIDY
unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
A wrapper around std::string which contains a source range that&#39;s being set during parsing...
unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned addFlatScratchInit(const SIRegisterInfo &TRI)
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getInitialPSInputAddr(const Function &F)
Provides AMDGPU specific target descriptions.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void mappingImpl(yaml::IO &YamlIO) override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Optional< SIArgument > ImplicitBufferPtr
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:220
#define I(x, y, z)
Definition: MD5.cpp:58
bool isAmdHsaOrMesa(const Function &F) const
uint32_t Size
Definition: Profile.cpp:46
static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
Optional< SIArgument > WorkGroupInfo
IO & YamlIO
Definition: ELFYAML.cpp:952
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:212
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
bool hasMAIInsts() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:205
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add &#39;1&#39; bits from Mask to this vector.
Definition: BitVector.h:775
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Optional< SIArgument > WorkGroupIDX
bool hasCalls() const
Return true if the current function has any function calls.
const SIRegisterInfo * getRegisterInfo() const override