14#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
27#define GET_SUBTARGETINFO_HEADER
28#include "AMDGPUGenSubtargetInfo.inc"
32class GCNTargetMachine;
53 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
54 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
55 std::unique_ptr<InstructionSelector> InstSelector;
57 std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
271 return &FrameLowering;
283 return CallLoweringInfo.get();
287 return InlineAsmLoweringInfo.get();
291 return InstSelector.get();
299 return RegBankInfo.get();
325 return (64 * 4) * ((1 << 18) - 1);
329 return (64 * 4) * ((1 << 15) - 1);
332 return (256 * 4) * ((1 << 13) - 1);
959 bool useAA()
const override;
977 unsigned NumRegionInstrs)
const override;
1310 unsigned NumSGPRs = 0,
unsigned NumVGPRs = 0)
const;
1422 std::pair<unsigned, unsigned> WavesPerEU,
1423 unsigned PreloadedSGPRs,
1424 unsigned ReservedNumSGPRs)
const;
1487 std::pair<unsigned, unsigned> WavesPerEU)
const;
1513 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1516 std::unique_ptr<ScheduleDAGMutation>
1649 bool ImplicitBufferPtr =
false;
1651 bool PrivateSegmentBuffer =
false;
1653 bool DispatchPtr =
false;
1655 bool QueuePtr =
false;
1657 bool KernargSegmentPtr =
false;
1659 bool DispatchID =
false;
1661 bool FlatScratchInit =
false;
1663 bool PrivateSegmentSize =
false;
1665 unsigned NumKernargPreloadSGPRs = 0;
1667 unsigned NumUsedUserSGPRs = 0;
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
Base class for AMDGPU specific classes of TargetSubtarget.
const HexagonInstrInfo * TII
const char LLVMTargetMachineRef TM
SI DAG Lowering interface definition.
Interface definition for SIInstrInfo.
unsigned getWavefrontSizeLog2() const
unsigned getMaxWavesPerEU() const
unsigned getWavefrontSize() const
bool isSramEccOnOrAny() const
bool isXnackOnOrAny() const
bool hasMemoryAtomicFaddF32DenormalSupport() const
bool hasD16Images() const
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
bool hasAtomicDsPkAdd16Insts() const
bool HasLdsBranchVmemWARHazard
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
bool hasPkFmacF16Inst() const
bool HasAtomicFMinFMaxF64FlatInsts
bool hasDot2Insts() const
bool hasD16LoadStore() const
bool HasExtendedImageInsts
bool hasMergedShaders() const
bool hasSDWAScalar() const
bool supportsBackOffBarrier() const
bool hasScalarCompareEq64() const
bool has1_5xVGPRs() const
int getLDSBankCount() const
bool hasOnlyRevVALUShifts() const
bool hasImageStoreD16Bug() const
bool hasNonNSAEncoding() const
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
void mirFileLoaded(MachineFunction &MF) const override
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool loadStoreOptEnabled() const
bool enableSubRegLiveness() const override
bool hasDPPWavefrontShifts() const
unsigned getSGPRAllocGranule() const
bool hasAtomicFMinFMaxF64FlatInsts() const
bool hasLdsAtomicAddF64() const
bool hasFlatLgkmVMemCountInOrder() const
bool flatScratchIsPointer() const
bool hasFP8ConversionInsts() const
bool hasShift64HighRegBug() const
bool hasDot7Insts() const
bool HasScalarDwordx3Loads
bool hasApertureRegs() const
unsigned MaxPrivateElementSize
bool unsafeDSOffsetFoldingEnabled() const
bool hasFPAtomicToDenormModeHazard() const
unsigned getAddressableNumArchVGPRs() const
bool hasFlatInstOffsets() const
bool vmemWriteNeedsExpWaitcnt() const
bool hasAtomicFMinFMaxF32FlatInsts() const
bool shouldClusterStores() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
unsigned getSGPREncodingGranule() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool hasLdsBranchVmemWARHazard() const
bool hasDefaultComponentZero() const
bool hasGetWaveIdInst() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
bool hasDstSelForwardingHazard() const
void setScalarizeGlobalBehavior(bool b)
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasExtendedImageInsts() const
bool hasBCNT(unsigned Size) const
bool HasFlatAtomicFaddF32Inst
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
bool hasFlatScratchInsts() const
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
bool HasFlatSegmentOffsetBug
bool hasMultiDwordFlatScratchAddressing() const
bool hasArchitectedSGPRs() const
bool hasDenormModeInst() const
bool hasPrivEnabledTrap2NopBug() const
bool hasMFMAInlineLiteralBug() const
bool NegativeScratchOffsetBug
unsigned getTotalNumVGPRs() const
unsigned getMinWavesPerEU() const override
bool hasSMemTimeInst() const
bool hasUnalignedDSAccessEnabled() const
bool hasNegativeScratchOffsetBug() const
const SIInstrInfo * getInstrInfo() const override
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
bool AutoWaitcntBeforeBarrier
bool hasDot1Insts() const
bool hasDot3Insts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool hasVALUMaskWriteHazard() const
const InlineAsmLowering * getInlineAsmLowering() const override
bool HasVGPRSingleUseHintInsts
bool hasAutoWaitcntBeforeBarrier() const
bool hasNSAClauseBug() const
bool hasAtomicFaddRtnInsts() const
unsigned getTotalNumSGPRs() const
const InstrItineraryData * getInstrItineraryData() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
bool HasShaderCyclesHiLoRegisters
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool hasGFX10_3Insts() const
Align getStackAlignment() const
bool privateMemoryResourceIsRangeChecked() const
bool hasScalarSubwordLoads() const
bool hasDot11Insts() const
bool enableFlatScratch() const
bool hasUnalignedBufferAccess() const
bool hasOffset3fBug() const
bool hasDwordx3LoadStores() const
bool hasSignedScratchOffsets() const
bool HasPrivEnabledTrap2NopBug
bool hasGlobalAddTidInsts() const
bool hasSGPRInitBug() const
bool hasFlatScrRegister() const
bool hasGetPCZeroExtension() const
bool hasPermLane64() const
bool requiresNopBeforeDeallocVGPRs() const
bool supportsGetDoorbellID() const
bool hasVcmpxExecWARHazard() const
bool isTgSplitEnabled() const
bool hasFlatAtomicFaddF32Inst() const
bool hasKernargPreload() const
unsigned getMaxNumAGPRs(const Function &F) const
unsigned getVGPRAllocGranule() const
bool hasReadM0MovRelInterpHazard() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
bool hasRequiredExportPriority() const
bool hasDOTOpSelHazard() const
bool hasLdsWaitVMSRC() const
bool hasMSAALoadDstSelBug() const
const TargetRegisterClass * getBoolRC() const
bool hasFmaakFmamkF32Insts() const
InstructionSelector * getInstructionSelector() const override
unsigned getVGPREncodingGranule() const
bool NegativeUnalignedScratchOffsetBug
bool hasHardClauses() const
bool hasExtendedWaitCounts() const
bool hasLDSMisalignedBug() const
bool HasPartialNSAEncoding
bool d16PreservesUnusedBits() const
bool hasFmacF64Inst() const
bool hasInstPrefetch() const
unsigned maxHardClauseLength() const
bool isMesaGfxShader(const Function &F) const
bool hasVcmpxPermlaneHazard() const
bool hasUserSGPRInit16Bug() const
bool hasExportInsts() const
bool hasVINTERPEncoding() const
const AMDGPURegisterBankInfo * getRegBankInfo() const override
bool hasLegacyGeometry() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
TrapHandlerAbi getTrapHandlerAbi() const
bool isCuModeEnabled() const
bool hasScalarAtomics() const
const SIFrameLowering * getFrameLowering() const override
bool hasUnalignedScratchAccess() const
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
bool hasSDWAOutModsVOPC() const
bool hasAtomicFMinFMaxF32GlobalInsts() const
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
bool hasAtomicCSubNoRtnInsts() const
bool hasScalarFlatScratchInsts() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
bool has12DWordStoreHazard() const
bool hasVALUPartialForwardingHazard() const
bool hasNoDataDepHazard() const
bool hasUnalignedDSAccess() const
bool hasRestrictedSOffset() const
bool hasMin3Max3_16() const
bool hasGFX10_AEncoding() const
bool hasFlatSegmentOffsetBug() const
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
const SITargetLowering * getTargetLowering() const override
bool HasVcmpxPermlaneHazard
bool hasPackedFP32Ops() const
bool hasTransForwardingHazard() const
bool hasDot6Insts() const
bool hasGFX940Insts() const
bool hasLshlAddB64() const
bool hasFullRate64Ops() const
bool hasScalarStores() const
bool isTrapHandlerEnabled() const
bool enableMachineScheduler() const override
bool hasLDSFPAtomicAddF64() const
bool HasAtomicFlatPkAdd16Insts
bool hasFlatGlobalInsts() const
bool hasDX10ClampMode() const
unsigned getNSAThreshold(const MachineFunction &MF) const
bool HasAtomicFMinFMaxF32GlobalInsts
bool getScalarizeGlobalBehavior() const
bool HasAtomicFMinFMaxF32FlatInsts
bool HasPseudoScalarTrans
bool hasReadM0LdsDmaHazard() const
bool hasScalarSMulU64() const
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
bool HasShaderCyclesRegister
bool hasShaderCyclesHiLoRegisters() const
bool HasDefaultComponentBroadcast
bool hasScalarPackInsts() const
bool hasNSAEncoding() const
bool hasSMemRealTime() const
bool hasFlatAddressSpace() const
bool hasDPPBroadcasts() const
bool usePRTStrictNull() const
bool HasFP8ConversionInsts
bool hasInstFwdPrefetchBug() const
bool hasAtomicFMinFMaxF64GlobalInsts() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
bool UnalignedScratchAccess
bool hasAtomicFlatPkAdd16Insts() const
bool HasImageGather4D16Bug
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool HasSMEMtoVectorWriteHazard
bool HasAtomicFaddNoRtnInsts
bool hasSMEMtoVectorWriteHazard() const
bool useAA() const override
bool hasVGPRIndexMode() const
bool HasAtomicBufferGlobalPkAddF16Insts
bool hasUnalignedBufferAccessEnabled() const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
unsigned getMinFlatWorkGroupSize() const override
bool HasAtomicCSubNoRtnInsts
bool hasImageInsts() const
bool HasAtomicDsPkAdd16Insts
bool hasImageGather4D16Bug() const
bool HasRequiredExportPriority
bool hasDot10Insts() const
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasVMEMtoScalarWriteHazard() const
bool hasCvtFP8VOP1Bug() const
bool supportsMinMaxDenormModes() const
bool HasAtomicBufferPkAddBF16Inst
bool hasNegativeUnalignedScratchOffsetBug() const
bool hasFlatScratchSVSMode() const
bool HasMSAALoadDstSelBug
bool hasHalfRate64Ops() const
bool hasAtomicFaddInsts() const
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasNSAtoVMEMBug() const
bool HasArchitectedFlatScratch
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
bool hasMIMG_R128() const
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
bool hasAtomicBufferPkAddBF16Inst() const
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
unsigned getMaxFlatWorkGroupSize() const override
bool hasDot5Insts() const
unsigned getMaxNumUserSGPRs() const
bool hasAtomicFaddNoRtnInsts() const
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
bool hasPermLaneX16() const
bool hasFlatScratchSVSSwizzleBug() const
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
bool hasScalarDwordx3Loads() const
bool hasVDecCoExecHazard() const
bool hasLDSFPAtomicAddF32() const
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
bool hasReadM0SendMsgHazard() const
bool hasDot8Insts() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
const LegalizerInfo * getLegalizerInfo() const override
bool hasDS96AndDS128() const
bool HasAtomicFMinFMaxF64GlobalInsts
bool hasReadM0LdsDirectHazard() const
bool useFlatForGlobal() const
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)
bool hasVOPDInsts() const
bool hasGFX10_BEncoding() const
SelectionDAGTargetInfo TSInfo
Generation getGeneration() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasForceStoreSC0SC1() const
bool hasVOP3Literal() const
bool hasAtomicBufferGlobalPkAddF16Insts() const
bool hasNoSdstCMPX() const
unsigned getAddressableNumVGPRs() const
bool isXNACKEnabled() const
bool hasScalarAddSub64() const
bool hasIEEEMinMax3() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool enableEarlyIfConversion() const override
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
bool hasRFEHazards() const
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
bool hasFlatScratchSTMode() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
bool hasGWSSemaReleaseAll() const
bool hasDPALU_DPP() const
bool enableSIScheduler() const
bool hasAtomicGlobalPkAddBF16Inst() const
bool HasVMEMtoScalarWriteHazard
bool HasAtomicGlobalPkAddBF16Inst
bool hasUnalignedAccessMode() const
unsigned getAddressableNumSGPRs() const
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
bool HasAtomicFaddRtnInsts
bool HasRestrictedSOffset
bool hasIEEEMinMax() const
bool hasFmaMixInsts() const
bool HasVALUTransUseHazard
bool hasPackedTID() const
bool HasVcmpxExecWARHazard
bool hasAddNoCarry() const
bool ScalarFlatScratchInsts
bool hasVALUTransUseHazard() const
bool hasShaderCyclesRegister() const
bool HasMFMAInlineLiteralBug
bool UnalignedBufferAccess
bool hasSALUFloatInsts() const
bool hasVGPRSingleUseHintInsts() const
bool EnableUnsafeDSOffsetFolding
bool isPreciseMemoryEnabled() const
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool hasDPPSrc1SGPR() const
unsigned getMaxWaveScratchSize() const
bool HasDefaultComponentZero
bool HasMemoryAtomicFaddF32DenormalSupport
bool hasDot4Insts() const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
bool flatScratchIsArchitected() const
bool hasPartialNSAEncoding() const
bool HasInstFwdPrefetchBug
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
bool hasDot9Insts() const
bool hasAtomicCSub() const
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
bool hasDefaultComponentBroadcast() const
bool requiresCodeObjectV6() const
const CallLowering * getCallLowering() const override
bool hasLdsDirect() const
bool hasGWSAutoReplay() const
bool HasFlatBufferGlobalAtomicFaddF64Inst
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
unsigned getNumFreeUserSGPRs()
bool hasImplicitBufferPtr() const
unsigned getNumKernargPreloadSGPRs() const
bool hasPrivateSegmentSize() const
unsigned getNumUsedUserSGPRs() const
bool hasDispatchPtr() const
bool hasFlatScratchInit() const
Itinerary data supplied by a subtarget to be used by a target.
const SIRegisterInfo & getRegisterInfo() const
const TargetRegisterClass * getBoolRC() const
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
Provide an instruction scheduling machine model to CodeGen passes.
TargetSubtargetInfo - Generic base class for all target subtargets.
Triple - Helper class for working with autoconf configuration names.
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool isShader(CallingConv::ID cc)
This is an optimization pass for GlobalISel generic memory operations.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.