40 UserSGPRInfo(
F, *STI), WorkGroupIDX(
false), WorkGroupIDY(
false),
42 PrivateSegmentWaveByteOffset(
false), WorkItemIDX(
false),
44 GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) {
46 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(
F);
47 WavesPerEU = ST.getWavesPerEU(
F);
48 MaxNumWorkGroups = ST.getMaxNumWorkGroups(
F);
54 VRegFlags.reserve(1024);
66 MayNeedAGPRs = ST.hasMAIInsts();
73 StackPtrOffsetReg = AMDGPU::SGPR32;
75 ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
80 ImplicitArgPtr =
false;
85 FrameOffsetReg = AMDGPU::SGPR33;
86 StackPtrOffsetReg = AMDGPU::SGPR32;
88 if (!ST.enableFlatScratch()) {
91 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
97 if (!
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
98 ImplicitArgPtr =
true;
100 ImplicitArgPtr =
false;
104 if (ST.hasGFX90AInsts() &&
105 ST.getMaxNumVGPRs(
F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
107 MayNeedAGPRs =
false;
112 ST.hasArchitectedSGPRs())) {
113 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workgroup-id-x"))
116 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-y"))
119 if (!
F.hasFnAttribute(
"amdgpu-no-workgroup-id-z"))
124 if (IsKernel || !
F.hasFnAttribute(
"amdgpu-no-workitem-id-x"))
127 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-y") &&
128 ST.getMaxWorkitemID(
F, 1) != 0)
131 if (!
F.hasFnAttribute(
"amdgpu-no-workitem-id-z") &&
132 ST.getMaxWorkitemID(
F, 2) != 0)
135 if (!IsKernel && !
F.hasFnAttribute(
"amdgpu-no-lds-kernel-id"))
145 if (!ST.flatScratchIsArchitected()) {
146 PrivateSegmentWaveByteOffset =
true;
151 ArgInfo.PrivateSegmentWaveByteOffset =
156 Attribute A =
F.getFnAttribute(
"amdgpu-git-ptr-high");
161 A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
162 S =
A.getValueAsString();
169 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
171 AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(
F) - 1);
193 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
195 return ArgInfo.PrivateSegmentBuffer.getRegister();
200 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
202 return ArgInfo.DispatchPtr.getRegister();
207 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
209 return ArgInfo.QueuePtr.getRegister();
215 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
217 return ArgInfo.KernargSegmentPtr.getRegister();
222 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
224 return ArgInfo.DispatchID.getRegister();
229 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
231 return ArgInfo.FlatScratchInit.getRegister();
237 return ArgInfo.PrivateSegmentSize.getRegister();
242 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
244 return ArgInfo.ImplicitBufferPtr.getRegister();
250 return ArgInfo.LDSKernelId.getRegister();
255 unsigned AllocSizeDWord,
int KernArgIdx,
int PaddingSGPRs) {
257 "Preload kernel argument allocated twice.");
258 NumUserSGPRs += PaddingSGPRs;
263 TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
265 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
266 ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
267 NumUserSGPRs += AllocSizeDWord;
269 for (
unsigned I = 0;
I < AllocSizeDWord; ++
I) {
270 ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
277 return &
ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
294 WWMSpills.
insert(std::make_pair(
304 for (
auto &Reg : WWMSpills) {
306 CalleeSavedRegs.push_back(Reg);
308 ScratchRegs.push_back(Reg);
314 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
315 if (CSRegs[
I] == Reg)
326 for (
Register &Reg : SpillPhysVGPRs) {
328 TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
329 if (!NewReg || NewReg >= Reg)
332 MRI.replaceRegWith(Reg, NewReg);
335 WWMReservedRegs.
remove(Reg);
336 WWMReservedRegs.
insert(NewReg);
337 WWMSpills.
insert(std::make_pair(NewReg, WWMSpills[Reg]));
338 WWMSpills.
erase(Reg);
349bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
354 LaneVGPR =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
355 SpillVGPRs.push_back(LaneVGPR);
357 LaneVGPR = SpillVGPRs.back();
360 SGPRSpillsToVirtualVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
364bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
365 MachineFunction &MF,
int FI,
unsigned LaneIndex,
bool IsPrologEpilog) {
374 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF,
376 if (LaneVGPR == AMDGPU::NoRegister) {
379 SGPRSpillsToPhysicalVGPRLanes.erase(FI);
389 SpillPhysVGPRs.push_back(LaneVGPR);
391 LaneVGPR = SpillPhysVGPRs.back();
394 SGPRSpillsToPhysicalVGPRLanes[FI].emplace_back(LaneVGPR, LaneIndex);
400 bool IsPrologEpilog) {
401 std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
402 SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
403 : SGPRSpillsToVirtualVGPRLanes[FI];
406 if (!SpillLanes.empty())
411 unsigned WaveSize = ST.getWavefrontSize();
413 unsigned Size = FrameInfo.getObjectSize(FI);
414 unsigned NumLanes =
Size / 4;
416 if (NumLanes > WaveSize)
419 assert(
Size >= 4 &&
"invalid sgpr spill size");
420 assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
421 "not spilling SGPRs to VGPRs");
423 unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
424 : NumVirtualVGPRSpillLanes;
426 for (
unsigned I = 0;
I < NumLanes; ++
I, ++NumSpillLanes) {
427 unsigned LaneIndex = (NumSpillLanes % WaveSize);
429 bool Allocated = SpillToPhysVGPRLane
430 ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
432 : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
454 auto &Spill = VGPRToAGPRSpills[FI];
457 if (!Spill.Lanes.empty())
458 return Spill.FullyAllocated;
461 unsigned NumLanes =
Size / 4;
462 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
465 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
468 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
470 Spill.FullyAllocated =
true;
485 OtherUsedRegs.
set(Reg);
487 OtherUsedRegs.
set(Reg);
490 for (
int I = NumLanes - 1;
I >= 0; --
I) {
491 NextSpillReg = std::find_if(
493 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
497 if (NextSpillReg == Regs.
end()) {
498 Spill.FullyAllocated =
false;
502 OtherUsedRegs.
set(*NextSpillReg);
504 MRI.reserveReg(*NextSpillReg,
TRI);
505 Spill.Lanes[
I] = *NextSpillReg++;
508 return Spill.FullyAllocated;
521 SGPRSpillsToVirtualVGPRLanes.erase(R.first);
526 if (!ResetSGPRSpillStackIDs) {
529 SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
532 bool HaveSGPRToMemory =
false;
534 if (ResetSGPRSpillStackIDs) {
542 HaveSGPRToMemory =
true;
548 for (
auto &R : VGPRToAGPRSpills) {
553 return HaveSGPRToMemory;
563 TRI.getSpillAlign(AMDGPU::SGPR_32RegClass),
false);
567MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const {
568 assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
569 return AMDGPU::SGPR0 + NumUserSGPRs;
572MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const {
573 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
576void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(
Register Reg) {
580void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(
Register NewReg,
582 VRegFlags.grow(NewReg);
583 VRegFlags[NewReg] = VRegFlags[SrcReg];
589 if (!ST.isAmdPalOS())
592 if (ST.hasMergedShaders()) {
598 GitPtrLo = AMDGPU::SGPR8;
617static std::optional<yaml::SIArgumentInfo>
622 auto convertArg = [&](std::optional<yaml::SIArgument> &
A,
629 if (Arg.isRegister()) {
636 SA.
Mask = Arg.getMask();
657 ArgInfo.PrivateSegmentWaveByteOffset);
673 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
674 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
675 GDSSize(MFI.getGDSSize()),
676 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
677 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
678 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
679 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
680 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
681 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
682 Occupancy(MFI.getOccupancy()),
686 BytesInStackArgArea(MFI.getBytesInStackArgArea()),
687 ReturnsVoid(MFI.returnsVoid()),
689 PSInputAddr(MFI.getPSInputAddr()),
690 PSInputEnable(MFI.getPSInputEnable()),
691 Mode(MFI.getMode()) {
742 "", std::nullopt, std::nullopt);
743 SourceRange = YamlMFI.
ScavengeFI->SourceRange;
754 return !
F.hasFnAttribute(
"amdgpu-no-agpr");
761 if (!mayNeedAGPRs()) {
774 for (
unsigned I = 0, E =
MRI.getNumVirtRegs();
I != E; ++
I) {
781 if (!RC && !
MRI.use_empty(Reg) &&
MRI.getType(Reg).isValid()) {
787 for (
MCRegister Reg : AMDGPU::AGPR_32RegClass) {
788 if (
MRI.isPhysRegUsed(Reg)) {
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static std::optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
Interface definition for SIRegisterInfo.
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
uint32_t getLDSSize() const
bool isChainFunction() const
bool isEntryFunction() const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
Allocate memory in an ever growing pool, as if by bump-pointer.
Lightweight error class with error context and mandatory checking.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const SITargetLowering * getTargetLowering() const override
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Wrapper class representing physical registers. Should be passed by value.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
void setStackID(int ObjectIdx, uint8_t ID)
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
int getObjectIndexBegin() const
Return the minimum frame object index.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * cloneInfo(const Ty &Old)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
size_type count(const KeyT &Key) const
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Register addPrivateSegmentSize(const SIRegisterInfo &TRI)
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
Register getLongBranchReservedReg() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
unsigned getMaxWavesPerEU() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Register addQueuePtr(const SIRegisterInfo &TRI)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
Register getGITPtrLoReg(const MachineFunction &MF) const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Register getSGPRForEXECCopy() const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF)
Register addLDSKernelId()
Register getVGPRForAGPRCopy() const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
const ReservedRegSet & getWWMReservedRegs() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
void limitOccupancy(const MachineFunction &MF)
SmallVectorImpl< MCRegister > * addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs)
void reserveWWMRegister(Register Reg)
static bool isChainScratchRegister(Register VGPR)
static bool isAGPRClass(const TargetRegisterClass *RC)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a location in source code.
Represents a range in source code.
bool remove(const value_type &X)
Remove an item from the set vector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
unsigned getMainFileID() const
const MemoryBuffer * getMemoryBuffer(unsigned i) const
StringRef - Represent a constant reference to a string, i.e.
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
constexpr bool empty() const
empty - Check if the string is empty.
const TargetMachine & getTargetMachine() const
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A raw_ostream that writes to an std::string.
bool isEntryFunctionCC(CallingConv::ID CC)
bool isChainCC(CallingConv::ID CC)
unsigned getInitialPSInputAddr(const Function &F)
bool isGraphics(CallingConv::ID cc)
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
A serializaable representation of a reference to a stack object or fixed stack object.
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
static SIArgument createArgument(bool IsReg)
StringValue SGPRForEXECCopy
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
StringValue LongBranchReservedReg
uint64_t ExplicitKernArgSize
void mappingImpl(yaml::IO &YamlIO) override
StringValue VGPRForAGPRCopy
std::optional< FrameIndex > ScavengeFI
unsigned BytesInStackArgArea
A wrapper around std::string which contains a source range that's being set during parsing.