41 GWSResourcePSV(
getTM(STI)),
42 PrivateSegmentBuffer(
false),
45 KernargSegmentPtr(
false),
47 FlatScratchInit(
false),
53 PrivateSegmentWaveByteOffset(
false),
57 ImplicitBufferPtr(
false),
58 ImplicitArgPtr(
false),
59 GITPtrHigh(0xffffffff),
60 HighBitsOf32BitAddress(0) {
62 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(
F);
63 WavesPerEU = ST.getWavesPerEU(
F);
70 const bool HasCalls =
F.hasFnAttribute(
"amdgpu-calls");
76 if (!
F.arg_empty() || ST.getImplicitArgNumBytes(
F) != 0)
77 KernargSegmentPtr =
true;
84 MayNeedAGPRs = ST.hasMAIInsts();
91 FrameOffsetReg = AMDGPU::SGPR33;
92 StackPtrOffsetReg = AMDGPU::SGPR32;
94 if (!ST.enableFlatScratch()) {
97 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
103 if (!
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
104 ImplicitArgPtr =
true;
106 ImplicitArgPtr =
false;
110 if (ST.hasGFX90AInsts() &&
111 ST.getMaxNumVGPRs(
F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
113 MayNeedAGPRs =
false;
116 bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(
F);
117 if (isAmdHsaOrMesa && !ST.enableFlatScratch())
118 PrivateSegmentBuffer =
true;
119 else if (ST.isMesaGfxShader(
F))
120 ImplicitBufferPtr =
true;
124 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workgroup-id-x"))
127 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-y"))
130 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-z"))
135 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workitem-id-x"))
138 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-y") &&
139 ST.getMaxWorkitemID(
F, 1) != 0)
142 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-z") &&
143 ST.getMaxWorkitemID(
F, 2) != 0)
146 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-ptr"))
149 if (!
F.hasFnAttribute(
"amdgpu-no-queue-ptr"))
152 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-id"))
155 if (!IsKernel && !
F.hasFnAttribute(
"amdgpu-no-lds-kernel-id"))
161 bool HasStackObjects =
F.hasFnAttribute(
"amdgpu-stack-objects");
167 (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
168 (
HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
169 !ST.flatScratchIsArchitected()) {
170 FlatScratchInit =
true;
179 if (!ST.flatScratchIsArchitected()) {
180 PrivateSegmentWaveByteOffset =
true;
185 ArgInfo.PrivateSegmentWaveByteOffset =
190 Attribute A =
F.getFnAttribute(
"amdgpu-git-ptr-high");
195 A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
196 S =
A.getValueAsString();
203 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
205 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(
F) - 1);
227 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
229 return ArgInfo.PrivateSegmentBuffer.getRegister();
234 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
236 return ArgInfo.DispatchPtr.getRegister();
241 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
243 return ArgInfo.QueuePtr.getRegister();
249 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
251 return ArgInfo.KernargSegmentPtr.getRegister();
256 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
258 return ArgInfo.DispatchID.getRegister();
263 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
265 return ArgInfo.FlatScratchInit.getRegister();
270 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
272 return ArgInfo.ImplicitBufferPtr.getRegister();
278 return ArgInfo.LDSKernelId.getRegister();
287 WWMSpills.
insert(std::make_pair(
297 for (
auto &Reg : WWMSpills) {
299 CalleeSavedRegs.push_back(Reg);
301 ScratchRegs.push_back(Reg);
307 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
308 if (CSRegs[
I] == Reg)
315bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(
MachineFunction &MF,
317 unsigned LaneIndex) {
323 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
324 if (LaneVGPR == AMDGPU::NoRegister) {
327 SGPRSpillToVGPRLanes.erase(FI);
331 SpillVGPRs.push_back(LaneVGPR);
335 BB.addLiveIn(LaneVGPR);
337 LaneVGPR = SpillVGPRs.back();
340 SGPRSpillToVGPRLanes[FI].push_back(
345bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
352 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
353 if (LaneVGPR == AMDGPU::NoRegister) {
356 PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
362 LaneVGPR = WWMSpills.
back().first;
365 PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
372 bool IsPrologEpilog) {
373 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
374 IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
375 : SGPRSpillToVGPRLanes[FI];
378 if (!SpillLanes.empty())
383 unsigned WaveSize = ST.getWavefrontSize();
385 unsigned Size = FrameInfo.getObjectSize(FI);
386 unsigned NumLanes =
Size / 4;
388 if (NumLanes > WaveSize)
391 assert(
Size >= 4 &&
"invalid sgpr spill size");
392 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
393 "not spilling SGPRs to VGPRs");
395 unsigned &NumSpillLanes =
396 IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
398 for (
unsigned I = 0;
I < NumLanes; ++
I, ++NumSpillLanes) {
399 unsigned LaneIndex = (NumSpillLanes % WaveSize);
403 ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
404 : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
426 auto &Spill = VGPRToAGPRSpills[FI];
429 if (!Spill.Lanes.empty())
430 return Spill.FullyAllocated;
433 unsigned NumLanes =
Size / 4;
434 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
437 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
440 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
442 Spill.FullyAllocated =
true;
457 OtherUsedRegs.
set(Reg);
459 OtherUsedRegs.
set(Reg);
462 for (
int I = NumLanes - 1;
I >= 0; --
I) {
463 NextSpillReg = std::find_if(
465 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
469 if (NextSpillReg == Regs.
end()) {
470 Spill.FullyAllocated =
false;
474 OtherUsedRegs.
set(*NextSpillReg);
476 MRI.reserveReg(*NextSpillReg,
TRI);
477 Spill.Lanes[
I] = *NextSpillReg++;
480 return Spill.FullyAllocated;
492 SGPRSpillToVGPRLanes.erase(R.first);
495 bool HaveSGPRToMemory =
false;
497 if (ResetSGPRSpillStackIDs) {
505 HaveSGPRToMemory =
true;
511 for (
auto &R : VGPRToAGPRSpills) {
516 return HaveSGPRToMemory;
525 TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0,
false);
528 TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
529 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),
false);
534MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const {
535 assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
536 return AMDGPU::SGPR0 + NumUserSGPRs;
539MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const {
540 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
546 if (!ST.isAmdPalOS())
549 if (ST.hasMergedShaders()) {
555 GitPtrLo = AMDGPU::SGPR8;
574static std::optional<yaml::SIArgumentInfo>
579 auto convertArg = [&](std::optional<yaml::SIArgument> &
A,
586 if (
Arg.isRegister()) {
613 ArgInfo.PrivateSegmentWaveByteOffset);
629 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
630 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
631 GDSSize(MFI.getGDSSize()),
632 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
633 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
634 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
635 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
636 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
637 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
638 Occupancy(MFI.getOccupancy()),
642 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
643 ReturnsVoid(MFI.returnsVoid()),
687 "", std::nullopt, std::nullopt);
688 SourceRange = YamlMFI.
ScavengeFI->SourceRange;
701 const auto *CB = dyn_cast<CallBase>(&
I);
705 if (CB->isInlineAsm()) {
706 const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
707 for (
const auto &CI : IA->ParseConstraints()) {
709 Code.consume_front(
"{");
710 if (Code.startswith(
"a"))
718 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
734 if (!mayNeedAGPRs()) {
747 for (
unsigned I = 0,
E =
MRI.getNumVirtRegs();
I !=
E; ++
I) {
753 }
else if (!RC && !
MRI.use_empty(Reg) &&
MRI.getType(Reg).isValid()) {
759 for (
MCRegister Reg : AMDGPU::AGPR_32RegClass) {
760 if (
MRI.isPhysRegUsed(Reg)) {
unsigned const MachineRegisterInfo * MRI
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
uint32_t getLDSSize() const
bool isEntryFunction() const
LLVM Basic Block Representation.
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Allocate memory in an ever growing pool, as if by bump-pointer.
Lightweight error class with error context and mandatory checking.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const SITargetLowering * getTargetLowering() const override
Wrapper class representing physical registers. Should be passed by value.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
size_type count(const KeyT &Key) const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
std::pair< KeyT, ValueT > & back()
This interface provides simple read-only access to a block of memory, and provides simple methods for...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
Register addFlatScratchInit(const SIRegisterInfo &TRI)
unsigned getMaxWavesPerEU() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addLDSKernelId()
Register getVGPRForAGPRCopy() const
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
void limitOccupancy(const MachineFunction &MF)
static bool isAGPRClass(const TargetRegisterClass *RC)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a location in source code.
Represents a range in source code.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
unsigned getMainFileID() const
const MemoryBuffer * getMemoryBuffer(unsigned i) const
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
const TargetMachine & getTargetMachine() const
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getInitialPSInputAddr(const Function &F)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
uint64_t ExplicitKernArgSize
void mappingImpl(yaml::IO &YamlIO) override
StringValue VGPRForAGPRCopy
std::optional< FrameIndex > ScavengeFI
unsigned BytesInStackArgArea
A wrapper around std::string which contains a source range that's being set during parsing.