Go to the documentation of this file.
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
27 class MachineFrameInfo;
28 class MachineFunction;
29 class SIMachineFunctionInfo;
31 class TargetRegisterClass;
160 if (
YamlIO.outputting()) {
162 YamlIO.mapRequired(
"reg", A.RegisterName);
164 YamlIO.mapRequired(
"offset", A.StackOffset);
166 auto Keys =
YamlIO.keys();
169 YamlIO.mapRequired(
"reg", A.RegisterName);
171 YamlIO.mapRequired(
"offset", A.StackOffset);
173 YamlIO.setError(
"missing required key 'reg' or 'offset'");
175 YamlIO.mapOptional(
"mask", A.Mask);
177 static const bool flow =
true;
217 YamlIO.mapOptional(
"privateSegmentWaveByteOffset",
262 YamlIO.mapOptional(
"dx10-clamp",
Mode.DX10Clamp,
true);
263 YamlIO.mapOptional(
"fp32-input-denormals",
Mode.FP32InputDenormals,
true);
264 YamlIO.mapOptional(
"fp32-output-denormals",
Mode.FP32OutputDenormals,
true);
265 YamlIO.mapOptional(
"fp64-fp16-input-denormals",
Mode.FP64FP16InputDenormals,
true);
266 YamlIO.mapOptional(
"fp64-fp16-output-denormals",
Mode.FP64FP16OutputDenormals,
true);
334 YamlIO.mapOptional(
"highBitsOf32BitAddress",
353 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
357 Register FrameOffsetReg = AMDGPU::FP_REG;
362 Register StackPtrOffsetReg = AMDGPU::SP_REG;
367 unsigned PSInputAddr = 0;
368 unsigned PSInputEnable = 0;
379 unsigned BytesInStackArgArea = 0;
381 bool ReturnsVoid =
true;
385 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
389 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
391 std::unique_ptr<const AMDGPUBufferPseudoSourceValue> BufferPSV;
392 std::unique_ptr<const AMDGPUImagePseudoSourceValue> ImagePSV;
393 std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
396 unsigned NumUserSGPRs = 0;
397 unsigned NumSystemSGPRs = 0;
399 bool HasSpilledSGPRs =
false;
400 bool HasSpilledVGPRs =
false;
401 bool HasNonSpillStackObjects =
false;
402 bool IsStackRealigned =
false;
408 bool PrivateSegmentBuffer : 1;
409 bool DispatchPtr : 1;
411 bool KernargSegmentPtr : 1;
413 bool FlatScratchInit : 1;
416 bool WorkGroupIDX : 1;
417 bool WorkGroupIDY : 1;
418 bool WorkGroupIDZ : 1;
419 bool WorkGroupInfo : 1;
420 bool PrivateSegmentWaveByteOffset : 1;
422 bool WorkItemIDX : 1;
423 bool WorkItemIDY : 1;
424 bool WorkItemIDZ : 1;
429 bool ImplicitBufferPtr : 1;
433 bool ImplicitArgPtr : 1;
435 bool MayNeedAGPRs : 1;
442 unsigned HighBitsOf32BitAddress;
447 mutable Optional<bool> UsesAGPRs;
504 unsigned NumVGPRSpillLanes = 0;
524 return VGPRForAGPRCopy;
528 VGPRForAGPRCopy = NewVGPRForAGPRCopy;
558 return (
I == SGPRToVGPRSpills.
end()) ?
574 return (
I == VGPRToAGPRSpills.
end()) ? (
MCPhysReg)AMDGPU::NoRegister
575 :
I->second.Lanes[Lane];
580 if (
I != VGPRToAGPRSpills.
end())
581 I->second.IsDead =
true;
585 unsigned NumLane)
const;
592 bool ResetSGPRSpillStackIDs);
598 return BytesInStackArgArea;
602 BytesInStackArgArea = Bytes;
618 return ArgInfo.WorkGroupIDX.getRegister();
624 return ArgInfo.WorkGroupIDY.getRegister();
630 return ArgInfo.WorkGroupIDZ.getRegister();
636 return ArgInfo.WorkGroupInfo.getRegister();
653 ArgInfo.PrivateSegmentWaveByteOffset
656 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
664 return PrivateSegmentBuffer;
676 return KernargSegmentPtr;
684 return FlatScratchInit;
700 return WorkGroupInfo;
704 return PrivateSegmentWaveByteOffset;
720 return ImplicitArgPtr;
724 return ImplicitBufferPtr;
735 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
752 return HighBitsOf32BitAddress;
760 return NumUserSGPRs + NumSystemSGPRs;
764 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
770 return ScratchRSrcReg;
774 assert(
Reg != 0 &&
"Should never be unset");
775 ScratchRSrcReg =
Reg;
779 return FrameOffsetReg;
783 assert(
Reg != 0 &&
"Should never be unset");
784 FrameOffsetReg =
Reg;
788 assert(
Reg != 0 &&
"Should never be unset");
789 StackPtrOffsetReg =
Reg;
797 return StackPtrOffsetReg;
801 return ArgInfo.QueuePtr.getRegister();
805 return ArgInfo.ImplicitBufferPtr.getRegister();
809 return HasSpilledSGPRs;
813 HasSpilledSGPRs = Spill;
817 return HasSpilledVGPRs;
821 HasSpilledVGPRs = Spill;
825 return HasNonSpillStackObjects;
829 HasNonSpillStackObjects = StackObject;
833 return IsStackRealigned;
837 IsStackRealigned = Realigned;
841 return NumSpilledSGPRs;
845 return NumSpilledVGPRs;
849 NumSpilledSGPRs += num;
853 NumSpilledVGPRs += num;
861 return PSInputEnable;
865 return PSInputAddr & (1 << Index);
869 PSInputAddr |= 1 << Index;
873 PSInputEnable |= 1 << Index;
887 return FlatWorkGroupSizes;
892 return FlatWorkGroupSizes.first;
897 return FlatWorkGroupSizes.second;
908 return WavesPerEU.first;
913 return WavesPerEU.second;
921 return ArgInfo.WorkGroupIDX.getRegister();
924 return ArgInfo.WorkGroupIDY.getRegister();
927 return ArgInfo.WorkGroupIDZ.getRegister();
934 BufferPSV = std::make_unique<AMDGPUBufferPseudoSourceValue>(
TII);
936 return BufferPSV.get();
941 ImagePSV = std::make_unique<AMDGPUImagePseudoSourceValue>(
TII);
943 return ImagePSV.get();
947 if (!GWSResourcePSV) {
949 std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(
TII);
952 return GWSResourcePSV.get();
962 return (Occupancy < 4) ? Occupancy : 4;
968 if (Occupancy > Limit)
973 if (Occupancy < Limit)
992 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
unsigned getMinFlatWorkGroupSize() const
void limitOccupancy(unsigned Limit)
static bool classof(const PseudoSourceValue *V)
void setIfReturnsVoid(bool Value)
void setIsStackRealigned(bool Realigned=true)
uint32_t HighBitsOf32BitAddress
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
auto wwmAllocation() const
bool hasSpilledVGPRs() const
Register addPrivateSegmentWaveByteOffset()
SmallVector< int, 8 > WWMReservedFrameIndexes
Track stack slots used for save/restore of reserved WWM VGPRs in the prolog/epilog.
This is an optimization pass for GlobalISel generic memory operations.
Register getVGPRForAGPRCopy() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool mayNeedAGPRs() const
void markPSInputAllocated(unsigned Index)
bool operator==(const SIMode Other) const
AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
unsigned getMinAllowedOccupancy() const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
Optional< SIArgumentInfo > ArgInfo
static bool classof(const PseudoSourceValue *V)
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Register addWorkGroupIDY()
Optional< SIArgument > WorkGroupIDX
unsigned getNumSpilledSGPRs() const
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Optional< SIArgument > DispatchPtr
Register addDispatchPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > PrivateSegmentWaveByteOffset
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
AMDGPUFunctionArgInfo & getArgInfo()
StringValue VGPRForAGPRCopy
Reg
All possible values of the reg field in the ModR/M byte.
const AMDGPUBufferPseudoSourceValue * getBufferPSV(const SIInstrInfo &TII)
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Register addWorkGroupIDZ()
bool hasImplicitArgPtr() const
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getGITPtrHigh() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkItemIDY
SIArgument(const SIArgument &Other)
unsigned getPSInputEnable() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
unsigned const TargetRegisterInfo * TRI
unsigned getNumPreloadedSGPRs() const
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
void setWorkItemIDZ(ArgDescriptor Arg)
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode)
Optional< SIArgument > WorkItemIDZ
static bool classof(const PseudoSourceValue *V)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void mappingImpl(yaml::IO &YamlIO) override
StringValue FrameOffsetReg
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
bool needsWaveLimiter() const
TargetInstrInfo - Interface to description of machine instruction set.
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const
constexpr char NumSpilledVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool hasWorkGroupIDZ() const
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
bool FP64FP16InputDenormals
const HexagonInstrInfo * TII
Optional< SIArgument > DispatchID
Optional< SIArgument > WorkItemIDX
void setBytesInStackArgArea(unsigned Bytes)
uint64_t ExplicitKernArgSize
Register getStackPtrOffsetReg() const
bool usesAGPRs(const MachineFunction &MF) const
This class implements an extremely fast bulk output stream that can only output to a stream.
static void mapping(IO &YamlIO, SIMode &Mode)
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Special value supplied for machine level alias analysis.
bool mayUseAGPRs(const MachineFunction &MF) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool hasNonSpillStackObjects() const
Register getWorkGroupIDSGPR(unsigned Dim) const
void setWorkItemIDX(ArgDescriptor Arg)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
Register addDispatchID(const SIRegisterInfo &TRI)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool hasSpilledSGPRs() const
const AMDGPUImagePseudoSourceValue * getImagePSV(const SIInstrInfo &TII)
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
static SIArgument createArgument(bool IsReg)
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkGroupIDZ
void setWorkItemIDY(ArgDescriptor Arg)
StringValue ScratchRSrcReg
SIArgument & operator=(const SIArgument &Other)
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
bool isStackRealigned() const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
bool hasWorkGroupInfo() const
SGPRSpillVGPR(Register V, Optional< int > F)
unsigned getMaxWavesPerEU() const
void reserveWWMRegister(Register Reg)
Optional< int > getOptionalScavengeFI() const
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
SIMachineFunctionInfo()=default
bool hasDispatchID() const
Optional< int > FramePointerSaveIndex
AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
unsigned getNumSpilledVGPRs() const
unsigned BytesInStackArgArea
SmallSetVector< Register, 8 > WWMReservedRegs
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Optional< FrameIndex > ScavengeFI
SmallVector< StringValue > WWMReservedRegs
unsigned getMinWavesPerEU() const
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t get32BitAddressHighBits() const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
const AMDGPUFunctionArgInfo & getArgInfo() const
void addToSpilledSGPRs(unsigned num)
bool isMemoryBound() const
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
void setHasSpilledVGPRs(bool Spill=true)
Register getGITPtrLoReg(const MachineFunction &MF) const
A wrapper around std::string which contains a source range that's being set during parsing.
unsigned getOccupancy() const
SIMachineFunctionInfo(const MachineFunction &MF)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool hasWorkGroupIDX() const
StringValue StackPtrOffsetReg
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void limitOccupancy(const MachineFunction &MF)
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
Optional< SIArgument > ImplicitBufferPtr
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Register getImplicitBufferPtrUserSGPR() const
void setStackPtrOffsetReg(Register Reg)
bool hasPrivateSegmentBuffer() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Helper struct shared between Function Specialization and SCCP Solver.
static void mapping(IO &YamlIO, SIArgument &A)
Wrapper class representing virtual and physical registers.
unsigned getNumUserSGPRs() const
bool hasWorkItemIDZ() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool hasPrivateSegmentWaveByteOffset() const
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
SmallVector< MCPhysReg, 32 > Lanes
unsigned getMaxFlatWorkGroupSize() const
AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
bool hasDispatchPtr() const
Optional< SIArgument > WorkGroupInfo
Register addQueuePtr(const SIRegisterInfo &TRI)
Optional< SIArgument > ImplicitArgPtr
Optional< SIArgument > KernargSegmentPtr
Optional< SIArgument > WorkGroupIDY
Optional< int > BasePointerSaveIndex
Optional< SIArgument > PrivateSegmentSize
Lightweight error class with error context and mandatory checking.
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const SIInstrInfo &TII)
bool hasWorkItemIDY() const
void setPrivateSegmentWaveByteOffset(Register Reg)
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
std::pair< unsigned, unsigned > getWavesPerEU() const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setScratchRSrcReg(Register Reg)
void setFrameOffsetReg(Register Reg)
Optional< SIArgument > PrivateSegmentBuffer
unsigned getBytesInStackArgArea() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
void setHasNonSpillStackObjects(bool StackObject=true)
void markPSInputEnabled(unsigned Index)
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Represents a range in source code.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool hasWorkGroupIDY() const
Register addWorkGroupIDX()
bool hasKernargSegmentPtr() const
SpilledReg(Register R, int L)
unsigned getPSInputAddr() const
bool isPSInputAllocated(unsigned Index) const
Register addWorkGroupInfo()
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
A SetVector that performs no allocations if smaller than a certain size.
void setHasSpilledSGPRs(bool Spill=true)
void addToSpilledVGPRs(unsigned num)
constexpr char NumSpilledSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
~SIMachineFunctionInfo()=default
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
bool hasWorkItemIDX() const
ArrayRef< SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
bool FP64FP16OutputDenormals
Optional< unsigned > Mask
LLVM Value Representation.
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
Optional< SIArgument > FlatScratchInit
Register getQueuePtrUserSGPR() const
Register getFrameOffsetReg() const
void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Optional< std::vector< StOtherPiece > > Other
Wrapper class representing physical registers. Should be passed by value.
Optional< SIArgument > QueuePtr