Go to the documentation of this file.
37 PrivateSegmentBuffer(
false),
40 KernargSegmentPtr(
false),
42 FlatScratchInit(
false),
47 PrivateSegmentWaveByteOffset(
false),
51 ImplicitBufferPtr(
false),
52 ImplicitArgPtr(
false),
53 GITPtrHigh(0xffffffff),
54 HighBitsOf32BitAddress(0) {
57 FlatWorkGroupSizes =
ST.getFlatWorkGroupSizes(
F);
58 WavesPerEU =
ST.getWavesPerEU(
F);
65 const bool HasCalls =
F.hasFnAttribute(
"amdgpu-calls");
71 if (!
F.arg_empty() ||
ST.getImplicitArgNumBytes(
F) != 0)
72 KernargSegmentPtr =
true;
79 MayNeedAGPRs =
ST.hasMAIInsts();
86 FrameOffsetReg = AMDGPU::SGPR33;
87 StackPtrOffsetReg = AMDGPU::SGPR32;
89 if (!
ST.enableFlatScratch()) {
92 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
98 if (!
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
99 ImplicitArgPtr =
true;
101 ImplicitArgPtr =
false;
105 if (
ST.hasGFX90AInsts() &&
106 ST.getMaxNumVGPRs(
F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
108 MayNeedAGPRs =
false;
111 bool isAmdHsaOrMesa =
ST.isAmdHsaOrMesa(
F);
112 if (isAmdHsaOrMesa && !
ST.enableFlatScratch())
113 PrivateSegmentBuffer =
true;
114 else if (
ST.isMesaGfxShader(
F))
115 ImplicitBufferPtr =
true;
118 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workgroup-id-x"))
121 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-y"))
124 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-z"))
127 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workitem-id-x"))
130 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-y") &&
131 ST.getMaxWorkitemID(
F, 1) != 0)
134 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-z") &&
135 ST.getMaxWorkitemID(
F, 2) != 0)
138 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-ptr"))
141 if (!
F.hasFnAttribute(
"amdgpu-no-queue-ptr"))
144 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-id"))
150 bool HasStackObjects =
F.hasFnAttribute(
"amdgpu-stack-objects");
156 (isAmdHsaOrMesa ||
ST.enableFlatScratch()) &&
157 (
HasCalls || HasStackObjects ||
ST.enableFlatScratch()) &&
158 !
ST.flatScratchIsArchitected()) {
159 FlatScratchInit =
true;
168 if (!
ST.flatScratchIsArchitected()) {
169 PrivateSegmentWaveByteOffset =
true;
174 ArgInfo.PrivateSegmentWaveByteOffset =
179 Attribute A =
F.getFnAttribute(
"amdgpu-git-ptr-high");
182 S.consumeInteger(0, GITPtrHigh);
184 A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
185 S = A.getValueAsString();
187 S.consumeInteger(0, HighBitsOf32BitAddress);
192 if (
ST.hasMAIInsts() && !
ST.hasGFX90AInsts()) {
194 AMDGPU::VGPR_32RegClass.getRegister(
ST.getMaxNumVGPRs(
F) - 1);
216 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
218 return ArgInfo.PrivateSegmentBuffer.getRegister();
223 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225 return ArgInfo.DispatchPtr.getRegister();
230 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232 return ArgInfo.QueuePtr.getRegister();
238 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
240 return ArgInfo.KernargSegmentPtr.getRegister();
245 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
247 return ArgInfo.DispatchID.getRegister();
252 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
254 return ArgInfo.FlatScratchInit.getRegister();
259 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
261 return ArgInfo.ImplicitBufferPtr.getRegister();
266 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
267 if (CSRegs[
I] ==
Reg)
279 unsigned NumNeed)
const {
281 unsigned WaveSize =
ST.getWavefrontSize();
282 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
288 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
291 if (!SpillLanes.empty())
298 unsigned WaveSize =
ST.getWavefrontSize();
301 unsigned NumLanes = Size / 4;
303 if (NumLanes > WaveSize)
306 assert(Size >= 4 &&
"invalid sgpr spill size");
307 assert(
TRI->spillSGPRToVGPR() &&
"not spilling SGPRs to VGPRs");
311 for (
unsigned I = 0;
I < NumLanes; ++
I, ++NumVGPRSpillLanes) {
313 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
315 if (VGPRIndex == 0) {
316 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
317 if (LaneVGPR == AMDGPU::NoRegister) {
320 SGPRToVGPRSpills.erase(FI);
321 NumVGPRSpillLanes -=
I;
327 "VGPRs for SGPR spilling",
346 BB.addLiveIn(LaneVGPR);
348 LaneVGPR = SpillVGPRs.back().VGPR;
369 auto &Spill = VGPRToAGPRSpills[FI];
372 if (!Spill.Lanes.empty())
373 return Spill.FullyAllocated;
376 unsigned NumLanes = Size / 4;
377 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
380 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
383 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
385 Spill.FullyAllocated =
true;
405 for (
int I = NumLanes - 1;
I >= 0; --
I) {
408 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
412 if (NextSpillReg == Regs.end()) {
413 Spill.FullyAllocated =
false;
417 OtherUsedRegs.
set(*NextSpillReg);
419 Spill.Lanes[
I] = *NextSpillReg++;
422 return Spill.FullyAllocated;
435 SGPRToVGPRSpills.erase(R.first);
439 bool HaveSGPRToMemory =
false;
441 if (ResetSGPRSpillStackIDs) {
449 HaveSGPRToMemory =
true;
455 for (
auto &R : VGPRToAGPRSpills) {
460 return HaveSGPRToMemory;
492 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const {
493 assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
494 return AMDGPU::SGPR0 + NumUserSGPRs;
497 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const {
498 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
504 if (!
ST.isAmdPalOS())
507 if (
ST.hasMergedShaders()) {
513 GitPtrLo = AMDGPU::SGPR8;
544 if (
Arg.isRegister()) {
570 ArgInfo.PrivateSegmentWaveByteOffset);
586 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
587 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
588 GDSSize(MFI.getGDSSize()),
589 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
590 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
591 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
592 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
593 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
594 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
595 Occupancy(MFI.getOccupancy()),
599 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
600 ReturnsVoid(MFI.returnsVoid()),
645 SourceRange = YamlMFI.
ScavengeFI->SourceRange;
658 const auto *CB = dyn_cast<CallBase>(&
I);
662 if (CB->isInlineAsm()) {
663 const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
666 Code.consume_front(
"{");
667 if (Code.startswith(
"a"))
675 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
691 if (!mayNeedAGPRs()) {
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
uint32_t HighBitsOf32BitAddress
static bool isAGPRClass(const TargetRegisterClass *RC)
SmallVector< int, 8 > WWMReservedFrameIndexes
Track stack slots used for save/restore of reserved WWM VGPRs in the prolog/epilog.
This is an optimization pass for GlobalISel generic memory operations.
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
static Optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
Register getVGPRForAGPRCopy() const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Optional< SIArgument > WorkGroupIDX
Optional< SIArgument > DispatchPtr
A raw_ostream that writes to an std::string.
Register addDispatchPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > PrivateSegmentWaveByteOffset
StringValue VGPRForAGPRCopy
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Reg
All possible values of the reg field in the ModR/M byte.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
static ConstraintInfoVector ParseConstraints(StringRef ConstraintString)
ParseConstraints - Split up the constraint string into the specific constraints and their prefixes.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
uint32_t getLDSSize() const
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Register addFlatScratchInit(const SIRegisterInfo &TRI)
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Optional< SIArgument > WorkItemIDY
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
@ AMDGPU_Gfx
Calling convention used for AMD graphics targets.
unsigned const TargetRegisterInfo * TRI
This interface provides simple read-only access to a block of memory, and provides simple methods for...
LLVM Basic Block Representation.
Optional< SIArgument > WorkItemIDZ
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void mappingImpl(yaml::IO &YamlIO) override
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Represents a location in source code.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Diagnostic information for stack size etc.
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Optional< SIArgument > DispatchID
Optional< SIArgument > WorkItemIDX
uint64_t ExplicitKernArgSize
unsigned getInitialPSInputAddr(const Function &F)
bool usesAGPRs(const MachineFunction &MF) const
uint8_t getStackID(int ObjectIdx) const
unsigned getMainFileID() const
bool mayUseAGPRs(const MachineFunction &MF) const
int getObjectIndexBegin() const
Return the minimum frame object index.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isEntryFunctionCC(CallingConv::ID CC)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Register addDispatchID(const SIRegisterInfo &TRI)
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Align getSpillAlign(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class.
static SIArgument createArgument(bool IsReg)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkGroupIDZ
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
unsigned getMaxWavesPerEU() const
Optional< int > getOptionalScavengeFI() const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Allocate memory in an ever growing pool, as if by bump-pointer.
SIMachineFunctionInfo()=default
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Optional< int > FramePointerSaveIndex
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
unsigned BytesInStackArgArea
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool isEntryFunction() const
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
SmallSetVector< Register, 8 > WWMReservedRegs
Optional< FrameIndex > ScavengeFI
SmallVector< StringValue > WWMReservedRegs
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Register getGITPtrLoReg(const MachineFunction &MF) const
A wrapper around std::string which contains a source range that's being set during parsing.
SIMachineFunctionInfo(const MachineFunction &MF)
void setStackID(int ObjectIdx, uint8_t ID)
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
StringRef - Represent a constant reference to a string, i.e.
void limitOccupancy(const MachineFunction &MF)
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Optional< SIArgument > ImplicitBufferPtr
bool isGraphics(CallingConv::ID cc)
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Helper struct shared between Function Specialization and SCCP Solver.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool hasCalls() const
Return true if the current function has any function calls.
Ty * cloneInfo(const Ty &Old)
amdgpu Simplify well known AMD library false FunctionCallee Callee
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Function & getFunction()
Return the LLVM function that this machine code represents.
Optional< SIArgument > WorkGroupInfo
Register addQueuePtr(const SIRegisterInfo &TRI)
Optional< SIArgument > ImplicitArgPtr
Optional< SIArgument > KernargSegmentPtr
const char * toString(DWARFSectionKind Kind)
Optional< SIArgument > WorkGroupIDY
Optional< int > BasePointerSaveIndex
Optional< SIArgument > PrivateSegmentSize
Lightweight error class with error context and mandatory checking.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Optional< SIArgument > PrivateSegmentBuffer
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Represents a range in source code.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
Optional< unsigned > Mask
Optional< SIArgument > FlatScratchInit
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Wrapper class representing physical registers. Should be passed by value.
Optional< SIArgument > QueuePtr