doxygen/html/GCNSubtarget%5F8h%5Fsource.html

//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//

//

/// \file

/// AMD GCN specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H


#include "AMDGPUCallLowering.h"

#include "AMDGPURegisterBankInfo.h"

#include "AMDGPUSubtarget.h"

#include "SIFrameLowering.h"

#include "SIISelLowering.h"

#include "SIInstrInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/Support/ErrorHandling.h"


#define GET_SUBTARGETINFO_HEADER

#include "AMDGPUGenSubtargetInfo.inc"


namespace llvm {


class GCNTargetMachine;


class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

                           public AMDGPUSubtarget {

public:

  using AMDGPUSubtarget::getMaxWavesPerEU;


  // Following 2 enums are documented at:

  //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi


  enum class TrapHandlerAbi {

    NONE   = 0x00,

    AMDHSA = 0x01,

  };


  enum class TrapID {

    LLVMAMDHSATrap      = 0x02,

    LLVMAMDHSADebugTrap = 0x03,

  };


private:

  /// SelectionDAGISel related APIs.

  std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;


  /// GlobalISel related APIs.

  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;

  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;

  std::unique_ptr<InstructionSelector> InstSelector;

  std::unique_ptr<LegalizerInfo> Legalizer;

  std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;


protected:

  // Basic subtarget description.

  AMDGPU::IsaInfo::AMDGPUTargetID TargetID;

  unsigned Gen = INVALID;

  InstrItineraryData InstrItins;

  int LDSBankCount = 0;

  unsigned MaxPrivateElementSize = 0;


  // Possibly statically set by tablegen, but may want to be overridden.

  bool FastDenormalF32 = false;

  bool HalfRate64Ops = false;

  bool FullRate64Ops = false;


  // Dynamically set bits that enable features.

  bool FlatForGlobal = false;

  bool AutoWaitcntBeforeBarrier = false;

  bool BackOffBarrier = false;

  bool UnalignedScratchAccess = false;

  bool UnalignedAccessMode = false;

  bool RelaxedBufferOOBMode = false;

  bool HasApertureRegs = false;

  bool SupportsXNACK = false;

  bool KernargPreload = false;


  // This should not be used directly. 'TargetID' tracks the dynamic settings

  // for XNACK.

  bool EnableXNACK = false;


  bool EnableTgSplit = false;

  bool EnableCuMode = false;

  bool TrapHandler = false;

  bool EnablePreciseMemory = false;


  // Used as options.

  bool EnableLoadStoreOpt = false;

  bool EnableUnsafeDSOffsetFolding = false;

  bool EnableSIScheduler = false;

  bool EnableDS128 = false;

  bool EnablePRTStrictNull = false;

  bool DumpCode = false;

  bool AssemblerPermissiveWavesize = false;


  // Subtarget statically properties set by tablegen

  bool FP64 = false;

  bool FMA = false;

  bool MIMG_R128 = false;

  bool CIInsts = false;

  bool GFX8Insts = false;

  bool GFX9Insts = false;

  bool GFX90AInsts = false;

  bool GFX940Insts = false;

  bool GFX950Insts = false;

  bool GFX10Insts = false;

  bool GFX11Insts = false;

  bool GFX12Insts = false;

  bool GFX1250Insts = false;

  bool GFX10_3Insts = false;

  bool GFX7GFX8GFX9Insts = false;

  bool SGPRInitBug = false;

  bool UserSGPRInit16Bug = false;

  bool NegativeScratchOffsetBug = false;

  bool NegativeUnalignedScratchOffsetBug = false;

  bool HasSMemRealTime = false;

  bool HasIntClamp = false;

  bool HasFmaMixInsts = false;

  bool HasFmaMixBF16Insts = false;

  bool HasMovrel = false;

  bool HasVGPRIndexMode = false;

  bool HasScalarDwordx3Loads = false;

  bool HasScalarStores = false;

  bool HasScalarAtomics = false;

  bool HasSDWAOmod = false;

  bool HasSDWAScalar = false;

  bool HasSDWASdst = false;

  bool HasSDWAMac = false;

  bool HasSDWAOutModsVOPC = false;

  bool HasDPP = false;

  bool HasDPP8 = false;

  bool HasDPALU_DPP = false;

  bool HasDPPSrc1SGPR = false;

  bool HasPackedFP32Ops = false;

  bool HasImageInsts = false;

  bool HasExtendedImageInsts = false;

  bool HasR128A16 = false;

  bool HasA16 = false;

  bool HasG16 = false;

  bool HasNSAEncoding = false;

  bool HasPartialNSAEncoding = false;

  bool GFX10_AEncoding = false;

  bool GFX10_BEncoding = false;

  bool HasDLInsts = false;

  bool HasFmacF64Inst = false;

  bool HasDot1Insts = false;

  bool HasDot2Insts = false;

  bool HasDot3Insts = false;

  bool HasDot4Insts = false;

  bool HasDot5Insts = false;

  bool HasDot6Insts = false;

  bool HasDot7Insts = false;

  bool HasDot8Insts = false;

  bool HasDot9Insts = false;

  bool HasDot10Insts = false;

  bool HasDot11Insts = false;

  bool HasDot12Insts = false;

  bool HasDot13Insts = false;

  bool HasMAIInsts = false;

  bool HasFP8Insts = false;

  bool HasFP8ConversionInsts = false;

  bool HasCubeInsts = false;

  bool HasLerpInst = false;

  bool HasSadInsts = false;

  bool HasQsadInsts = false;

  bool HasCvtNormInsts = false;

  bool HasCvtPkNormVOP2Insts = false;

  bool HasCvtPkNormVOP3Insts = false;

  bool HasFP8E5M3Insts = false;

  bool HasCvtFP8Vop1Bug = false;

  bool HasPkFmacF16Inst = false;

  bool HasAtomicFMinFMaxF32GlobalInsts = false;

  bool HasAtomicFMinFMaxF64GlobalInsts = false;

  bool HasAtomicFMinFMaxF32FlatInsts = false;

  bool HasAtomicFMinFMaxF64FlatInsts = false;

  bool HasAtomicDsPkAdd16Insts = false;

  bool HasAtomicFlatPkAdd16Insts = false;

  bool HasAtomicFaddRtnInsts = false;

  bool HasAtomicFaddNoRtnInsts = false;

  bool HasMemoryAtomicFaddF32DenormalSupport = false;

  bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;

  bool HasAtomicBufferGlobalPkAddF16Insts = false;

  bool HasAtomicCSubNoRtnInsts = false;

  bool HasAtomicGlobalPkAddBF16Inst = false;

  bool HasAtomicBufferPkAddBF16Inst = false;

  bool HasFlatAtomicFaddF32Inst = false;

  bool HasFlatBufferGlobalAtomicFaddF64Inst = false;

  bool HasDefaultComponentZero = false;

  bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;

  bool HasEmulatedSystemScopeAtomics = false;

  bool HasDefaultComponentBroadcast = false;

  bool HasXF32Insts = false;

  /// The maximum number of instructions that may be placed within an S_CLAUSE,

  /// which is one greater than the maximum argument to S_CLAUSE. A value of 0

  /// indicates a lack of S_CLAUSE support.

  unsigned MaxHardClauseLength = 0;

  bool SupportsSRAMECC = false;

  bool DynamicVGPR = false;

  bool DynamicVGPRBlockSize32 = false;

  bool HasVMemToLDSLoad = false;

  bool RequiresAlignVGPR = false;


  // This should not be used directly. 'TargetID' tracks the dynamic settings

  // for SRAMECC.

  bool EnableSRAMECC = false;


  bool HasNoSdstCMPX = false;

  bool HasVscnt = false;

  bool HasWaitXcnt = false;

  bool HasGetWaveIdInst = false;

  bool HasSMemTimeInst = false;

  bool HasShaderCyclesRegister = false;

  bool HasShaderCyclesHiLoRegisters = false;

  bool HasVOP3Literal = false;

  bool HasNoDataDepHazard = false;

  bool FlatAddressSpace = false;

  bool FlatInstOffsets = false;

  bool FlatGlobalInsts = false;

  bool FlatScratchInsts = false;

  bool FlatGVSMode = false;

  bool ScalarFlatScratchInsts = false;

  bool HasArchitectedFlatScratch = false;

  bool EnableFlatScratch = false;

  bool HasArchitectedSGPRs = false;

  bool HasGDS = false;

  bool HasGWS = false;

  bool AddNoCarryInsts = false;

  bool HasUnpackedD16VMem = false;

  bool LDSMisalignedBug = false;

  bool HasMFMAInlineLiteralBug = false;

  bool UnalignedBufferAccess = false;

  bool UnalignedDSAccess = false;

  bool HasPackedTID = false;

  bool ScalarizeGlobal = false;

  bool HasSALUFloatInsts = false;

  bool HasPseudoScalarTrans = false;

  bool HasRestrictedSOffset = false;

  bool Has64BitLiterals = false;

  bool Has1024AddressableVGPRs = false;

  bool HasBitOp3Insts = false;

  bool HasTanhInsts = false;

  bool HasTensorCvtLutInsts = false;

  bool HasTransposeLoadF4F6Insts = false;

  bool HasPrngInst = false;

  bool HasBVHDualAndBVH8Insts = false;

  bool HasPermlane16Swap = false;

  bool HasPermlane32Swap = false;

  bool HasVcmpxPermlaneHazard = false;

  bool HasVMEMtoScalarWriteHazard = false;

  bool HasSMEMtoVectorWriteHazard = false;

  bool HasInstFwdPrefetchBug = false;

  bool HasVmemPrefInsts = false;

  bool HasSafeSmemPrefetch = false;

  bool HasSafeCUPrefetch = false;

  bool HasVcmpxExecWARHazard = false;

  bool HasLdsBranchVmemWARHazard = false;

  bool HasNSAtoVMEMBug = false;

  bool HasNSAClauseBug = false;

  bool HasOffset3fBug = false;

  bool HasFlatSegmentOffsetBug = false;

  bool HasImageStoreD16Bug = false;

  bool HasImageGather4D16Bug = false;

  bool HasMSAALoadDstSelBug = false;

  bool HasPrivEnabledTrap2NopBug = false;

  bool Has1_5xVGPRs = false;

  bool HasMADIntraFwdBug = false;

  bool HasVOPDInsts = false;

  bool HasVALUTransUseHazard = false;

  bool HasRequiredExportPriority = false;

  bool HasVmemWriteVgprInOrder = false;

  bool HasAshrPkInsts = false;

  bool HasIEEEMinimumMaximumInsts = false;

  bool HasMinimum3Maximum3F32 = false;

  bool HasMinimum3Maximum3F16 = false;

  bool HasMin3Max3PKF16 = false;

  bool HasMinimum3Maximum3PKF16 = false;

  bool HasLshlAddU64Inst = false;

  bool HasAddSubU64Insts = false;

  bool HasMadU32Inst = false;

  bool HasAddMinMaxInsts = false;

  bool HasPkAddMinMaxInsts = false;

  bool HasPointSampleAccel = false;

  bool HasLdsBarrierArriveAtomic = false;

  bool HasSetPrioIncWgInst = false;

  bool HasSWakeupBarrier = false;


  bool RequiresCOV6 = false;

  bool UseBlockVGPROpsForCSR = false;

  bool HasGloballyAddressableScratch = false;


  bool Has45BitNumRecordsBufferResource = false;


  bool HasClusters = false;

  bool RequiresWaitsBeforeSystemScopeStores = false;


  // Dummy feature to use for assembler in tablegen.

  bool FeatureDisable = false;


private:

  SIInstrInfo InstrInfo;

  SITargetLowering TLInfo;

  SIFrameLowering FrameLowering;


public:

  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,

               const GCNTargetMachine &TM);

  ~GCNSubtarget() override;


  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,

                                                   StringRef GPU, StringRef FS);


  /// Diagnose inconsistent subtarget features before attempting to codegen

  /// function \p F.

  void checkSubtargetFeatures(const Function &F) const;


  const SIInstrInfo *getInstrInfo() const override {

    return &InstrInfo;

  }


  const SIFrameLowering *getFrameLowering() const override {

    return &FrameLowering;

  }


  const SITargetLowering *getTargetLowering() const override {

    return &TLInfo;

  }


  const SIRegisterInfo *getRegisterInfo() const override {

    return &InstrInfo.getRegisterInfo();

  }


  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;


  const CallLowering *getCallLowering() const override {

    return CallLoweringInfo.get();

  }


  const InlineAsmLowering *getInlineAsmLowering() const override {

    return InlineAsmLoweringInfo.get();

  }


  InstructionSelector *getInstructionSelector() const override {

    return InstSelector.get();

  }


  const LegalizerInfo *getLegalizerInfo() const override {

    return Legalizer.get();

  }


  const AMDGPURegisterBankInfo *getRegBankInfo() const override {

    return RegBankInfo.get();

  }


  const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {

    return TargetID;

  }


  const InstrItineraryData *getInstrItineraryData() const override {

    return &InstrItins;

  }


  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);


  Generation getGeneration() const {

    return (Generation)Gen;

  }


  unsigned getMaxWaveScratchSize() const {

    // See COMPUTE_TMPRING_SIZE.WAVESIZE.

    if (getGeneration() >= GFX12) {

      // 18-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 18) - 1);

    }

    if (getGeneration() == GFX11) {

      // 15-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 15) - 1);

    }

    // 13-bit field in units of 256-dword.

    return (256 * 4) * ((1 << 13) - 1);

  }


  /// Return the number of high bits known to be zero for a frame index.


  unsigned getKnownHighZeroBitsForFrameIndex() const {

    return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();

  }


  int getLDSBankCount() const {

    return LDSBankCount;

  }


  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {

    return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;

  }


  unsigned getConstantBusLimit(unsigned Opcode) const;


  /// Returns if the result of this instruction with a 16-bit result returned in

  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve

  /// the original value.

  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;


  bool supportsWGP() const {

    if (GFX1250Insts)

      return false;

    return getGeneration() >= GFX10;

  }


  bool hasIntClamp() const {

    return HasIntClamp;

  }


  bool hasFP64() const {

    return FP64;

  }


  bool hasMIMG_R128() const {

    return MIMG_R128;

  }


  bool hasHWFP64() const {

    return FP64;

  }


  bool hasHalfRate64Ops() const {

    return HalfRate64Ops;

  }


  bool hasFullRate64Ops() const {

    return FullRate64Ops;

  }


  bool hasAddr64() const {

    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);

  }


  bool hasFlat() const {

    return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);

  }


  // Return true if the target only has the reverse operand versions of VALU

  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).


  bool hasOnlyRevVALUShifts() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasFractBug() const {

    return getGeneration() == SOUTHERN_ISLANDS;

  }


  bool hasBFE() const {

    return true;

  }


  bool hasBFI() const {

    return true;

  }


  bool hasBFM() const {

    return hasBFE();

  }


  bool hasBCNT(unsigned Size) const {

    return true;

  }


  bool hasFFBL() const {

    return true;

  }


  bool hasFFBH() const {

    return true;

  }


  bool hasMed3_16() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  bool hasMin3Max3_16() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  bool hasFmaMixInsts() const {

    return HasFmaMixInsts;

  }


  bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; }


  bool hasCARRY() const {

    return true;

  }


  bool hasFMA() const {

    return FMA;

  }


  bool hasSwap() const {

    return GFX9Insts;

  }


  bool hasScalarPackInsts() const {

    return GFX9Insts;

  }


  bool hasScalarMulHiInsts() const {

    return GFX9Insts;

  }


  bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }


  TrapHandlerAbi getTrapHandlerAbi() const {

    return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;

  }


  bool supportsGetDoorbellID() const {

    // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.

    return getGeneration() >= GFX9;

  }


  /// True if the offset field of DS instructions works as expected. On SI, the

  /// offset uses a 16-bit adder and does not always wrap properly.


  bool hasUsableDSOffset() const {

    return getGeneration() >= SEA_ISLANDS;

  }


  bool unsafeDSOffsetFoldingEnabled() const {

    return EnableUnsafeDSOffsetFolding;

  }


  /// Condition output from div_scale is usable.


  bool hasUsableDivScaleConditionOutput() const {

    return getGeneration() != SOUTHERN_ISLANDS;

  }


  /// Extra wait hazard is needed in some cases before

  /// s_cbranch_vccnz/s_cbranch_vccz.


  bool hasReadVCCZBug() const {

    return getGeneration() <= SEA_ISLANDS;

  }


  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.


  bool partialVCCWritesUpdateVCCZ() const {

    return getGeneration() >= GFX10;

  }


  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR

  /// was written by a VALU instruction.


  bool hasSMRDReadVALUDefHazard() const {

    return getGeneration() == SOUTHERN_ISLANDS;

  }


  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the

  /// SGPR was written by a VALU Instruction.


  bool hasVMEMReadSGPRVALUDefHazard() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasRFEHazards() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.


  unsigned getSetRegWaitStates() const {

    return getGeneration() <= SEA_ISLANDS ? 1 : 2;

  }


  bool dumpCode() const {

    return DumpCode;

  }


  /// Return the amount of LDS that can be used that will not restrict the

  /// occupancy lower than WaveCount.

  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,

                                           const Function &) const;


  bool supportsMinMaxDenormModes() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns If target supports S_DENORM_MODE.


  bool hasDenormModeInst() const {

    return getGeneration() >= AMDGPUSubtarget::GFX10;

  }


  bool useFlatForGlobal() const {

    return FlatForGlobal;

  }


  /// \returns If target supports ds_read/write_b128 and user enables generation

  /// of ds_read/write_b128.


  bool useDS128() const {

    return CIInsts && EnableDS128;

  }


  /// \return If target supports ds_read/write_b96/128.


  bool hasDS96AndDS128() const {

    return CIInsts;

  }


  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64


  bool haveRoundOpsF64() const {

    return CIInsts;

  }


  /// \returns If MUBUF instructions always perform range checking, even for

  /// buffer resources used for private memory access.


  bool privateMemoryResourceIsRangeChecked() const {

    return getGeneration() < AMDGPUSubtarget::GFX9;

  }


  /// \returns If target requires PRT Struct NULL support (zero result registers

  /// for sparse texture support).


  bool usePRTStrictNull() const {

    return EnablePRTStrictNull;

  }


  bool hasAutoWaitcntBeforeBarrier() const {

    return AutoWaitcntBeforeBarrier;

  }


  /// \returns true if the target supports backing off of s_barrier instructions

  /// when an exception is raised.


  bool supportsBackOffBarrier() const {

    return BackOffBarrier;

  }


  bool hasUnalignedBufferAccess() const {

    return UnalignedBufferAccess;

  }


  bool hasUnalignedBufferAccessEnabled() const {

    return UnalignedBufferAccess && UnalignedAccessMode;

  }


  bool hasUnalignedDSAccess() const {

    return UnalignedDSAccess;

  }


  bool hasUnalignedDSAccessEnabled() const {

    return UnalignedDSAccess && UnalignedAccessMode;

  }


  bool hasUnalignedScratchAccess() const {

    return UnalignedScratchAccess;

  }


  bool hasUnalignedScratchAccessEnabled() const {

    return UnalignedScratchAccess && UnalignedAccessMode;

  }


  bool hasUnalignedAccessMode() const {

    return UnalignedAccessMode;

  }


  bool hasRelaxedBufferOOBMode() const { return RelaxedBufferOOBMode; }


  bool hasApertureRegs() const {

    return HasApertureRegs;

  }


  bool isTrapHandlerEnabled() const {

    return TrapHandler;

  }


  bool isXNACKEnabled() const {

    return TargetID.isXnackOnOrAny();

  }


  bool isTgSplitEnabled() const {

    return EnableTgSplit;

  }


  bool isCuModeEnabled() const {

    return EnableCuMode;

  }


  bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }


  bool hasFlatAddressSpace() const {

    return FlatAddressSpace;

  }


  bool hasFlatScrRegister() const {

    return hasFlatAddressSpace();

  }


  bool hasFlatInstOffsets() const {

    return FlatInstOffsets;

  }


  bool hasFlatGlobalInsts() const {

    return FlatGlobalInsts;

  }


  bool hasFlatScratchInsts() const {

    return FlatScratchInsts;

  }


  // Check if target supports ST addressing mode with FLAT scratch instructions.

  // The ST addressing mode means no registers are used, either VGPR or SGPR,

  // but only immediate offset is swizzled and added to the FLAT scratch base.


  bool hasFlatScratchSTMode() const {

    return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());

  }


  bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }


  bool hasScalarFlatScratchInsts() const {

    return ScalarFlatScratchInsts;

  }


  bool enableFlatScratch() const {

    return flatScratchIsArchitected() ||

           (EnableFlatScratch && hasFlatScratchInsts());

  }


  bool hasGlobalAddTidInsts() const {

    return GFX10_BEncoding;

  }


  bool hasAtomicCSub() const {

    return GFX10_BEncoding;

  }


  bool hasMTBUFInsts() const { return !hasGFX1250Insts(); }


  bool hasFormattedMUBUFInsts() const { return !hasGFX1250Insts(); }


  bool hasExportInsts() const {

    return !hasGFX940Insts() && !hasGFX1250Insts();

  }


  bool hasVINTERPEncoding() const { return GFX11Insts && !hasGFX1250Insts(); }


  // DS_ADD_F64/DS_ADD_RTN_F64


  bool hasLdsAtomicAddF64() const {

    return hasGFX90AInsts() || hasGFX1250Insts();

  }


  bool hasMultiDwordFlatScratchAddressing() const {

    return getGeneration() >= GFX9;

  }


  bool hasFlatSegmentOffsetBug() const {

    return HasFlatSegmentOffsetBug;

  }


  bool hasFlatLgkmVMemCountInOrder() const {

    return getGeneration() > GFX9;

  }


  bool hasD16LoadStore() const {

    return getGeneration() >= GFX9;

  }


  bool d16PreservesUnusedBits() const {

    return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();

  }


  bool hasD16Images() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  /// Return if most LDS instructions have an m0 use that require m0 to be

  /// initialized.


  bool ldsRequiresM0Init() const {

    return getGeneration() < GFX9;

  }


  // True if the hardware rewinds and replays GWS operations if a wave is

  // preempted.

  //

  // If this is false, a GWS operation requires testing if a nack set the

  // MEM_VIOL bit, and repeating if so.


  bool hasGWSAutoReplay() const {

    return getGeneration() >= GFX9;

  }


  /// \returns if target has ds_gws_sema_release_all instruction.


  bool hasGWSSemaReleaseAll() const {

    return CIInsts;

  }


  /// \returns true if the target has integer add/sub instructions that do not

  /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,

  /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier

  /// for saturation.


  bool hasAddNoCarry() const {

    return AddNoCarryInsts;

  }


  bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }


  bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }


  bool hasUnpackedD16VMem() const {

    return HasUnpackedD16VMem;

  }


  // Covers VS/PS/CS graphics shaders


  bool isMesaGfxShader(const Function &F) const {

    return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());

  }


  bool hasMad64_32() const {

    return getGeneration() >= SEA_ISLANDS;

  }


  bool hasSDWAOmod() const {

    return HasSDWAOmod;

  }


  bool hasSDWAScalar() const {

    return HasSDWAScalar;

  }


  bool hasSDWASdst() const {

    return HasSDWASdst;

  }


  bool hasSDWAMac() const {

    return HasSDWAMac;

  }


  bool hasSDWAOutModsVOPC() const {

    return HasSDWAOutModsVOPC;

  }


  bool hasDLInsts() const {

    return HasDLInsts;

  }


  bool hasFmacF64Inst() const { return HasFmacF64Inst; }


  bool hasDot1Insts() const {

    return HasDot1Insts;

  }


  bool hasDot2Insts() const {

    return HasDot2Insts;

  }


  bool hasDot3Insts() const {

    return HasDot3Insts;

  }


  bool hasDot4Insts() const {

    return HasDot4Insts;

  }


  bool hasDot5Insts() const {

    return HasDot5Insts;

  }


  bool hasDot6Insts() const {

    return HasDot6Insts;

  }


  bool hasDot7Insts() const {

    return HasDot7Insts;

  }


  bool hasDot8Insts() const {

    return HasDot8Insts;

  }


  bool hasDot9Insts() const {

    return HasDot9Insts;

  }


  bool hasDot10Insts() const {

    return HasDot10Insts;

  }


  bool hasDot11Insts() const {

    return HasDot11Insts;

  }


  bool hasDot12Insts() const {

    return HasDot12Insts;

  }


  bool hasDot13Insts() const {

    return HasDot13Insts;

  }


  bool hasMAIInsts() const {

    return HasMAIInsts;

  }


  bool hasFP8Insts() const {

    return HasFP8Insts;

  }


  bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; }


  bool hasCubeInsts() const { return HasCubeInsts; }


  bool hasLerpInst() const { return HasLerpInst; }


  bool hasSadInsts() const { return HasSadInsts; }


  bool hasQsadInsts() const { return HasQsadInsts; }


  bool hasCvtNormInsts() const { return HasCvtNormInsts; }


  bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; }


  bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; }


  bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; }


  bool hasPkFmacF16Inst() const {

    return HasPkFmacF16Inst;

  }


  bool hasAtomicFMinFMaxF32GlobalInsts() const {

    return HasAtomicFMinFMaxF32GlobalInsts;

  }


  bool hasAtomicFMinFMaxF64GlobalInsts() const {

    return HasAtomicFMinFMaxF64GlobalInsts;

  }


  bool hasAtomicFMinFMaxF32FlatInsts() const {

    return HasAtomicFMinFMaxF32FlatInsts;

  }


  bool hasAtomicFMinFMaxF64FlatInsts() const {

    return HasAtomicFMinFMaxF64FlatInsts;

  }


  bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }


  bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }


  bool hasAtomicFaddInsts() const {

    return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;

  }


  bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }


  bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }


  bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {

    return HasAtomicBufferGlobalPkAddF16NoRtnInsts;

  }


  bool hasAtomicBufferGlobalPkAddF16Insts() const {

    return HasAtomicBufferGlobalPkAddF16Insts;

  }


  bool hasAtomicGlobalPkAddBF16Inst() const {

    return HasAtomicGlobalPkAddBF16Inst;

  }


  bool hasAtomicBufferPkAddBF16Inst() const {

    return HasAtomicBufferPkAddBF16Inst;

  }


  bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }


  /// \return true if the target has flat, global, and buffer atomic fadd for

  /// double.


  bool hasFlatBufferGlobalAtomicFaddF64Inst() const {

    return HasFlatBufferGlobalAtomicFaddF64Inst;

  }


  /// \return true if the target's flat, global, and buffer atomic fadd for

  /// float supports denormal handling.


  bool hasMemoryAtomicFaddF32DenormalSupport() const {

    return HasMemoryAtomicFaddF32DenormalSupport;

  }


  /// \return true if atomic operations targeting fine-grained memory work

  /// correctly at device scope, in allocations in host or peer PCIe device

  /// memory.


  bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {

    return HasAgentScopeFineGrainedRemoteMemoryAtomics;

  }


  /// \return true is HW emulates system scope atomics unsupported by the PCI-e

  /// via CAS loop.


  bool hasEmulatedSystemScopeAtomics() const {

    return HasEmulatedSystemScopeAtomics;

  }


  bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }


  bool hasDefaultComponentBroadcast() const {

    return HasDefaultComponentBroadcast;

  }


  bool hasNoSdstCMPX() const {

    return HasNoSdstCMPX;

  }


  bool hasVscnt() const {

    return HasVscnt;

  }


  bool hasGetWaveIdInst() const {

    return HasGetWaveIdInst;

  }


  bool hasSMemTimeInst() const {

    return HasSMemTimeInst;

  }


  bool hasShaderCyclesRegister() const {

    return HasShaderCyclesRegister;

  }


  bool hasShaderCyclesHiLoRegisters() const {

    return HasShaderCyclesHiLoRegisters;

  }


  bool hasVOP3Literal() const {

    return HasVOP3Literal;

  }


  bool hasNoDataDepHazard() const {

    return HasNoDataDepHazard;

  }


  bool vmemWriteNeedsExpWaitcnt() const {

    return getGeneration() < SEA_ISLANDS;

  }


  bool hasInstPrefetch() const {

    return getGeneration() == GFX10 || getGeneration() == GFX11;

  }


  bool hasPrefetch() const { return GFX12Insts; }


  bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }


  bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; }


  bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; }


  // Has s_cmpk_* instructions.

  bool hasSCmpK() const { return getGeneration() < GFX12; }


  // Scratch is allocated in 256 dword per wave blocks for the entire

  // wavefront. When viewed from the perspective of an arbitrary workitem, this

  // is 4-byte aligned.

  //

  // Only 4-byte alignment is really needed to access anything. Transformations

  // on the pointer value itself may rely on the alignment / known low bits of

  // the pointer. Set this to something above the minimum to avoid needing

  // dynamic realignment in common cases.

  Align getStackAlignment() const { return Align(16); }


  bool enableMachineScheduler() const override {

    return true;

  }


  bool useAA() const override;


  bool enableSubRegLiveness() const override {

    return true;

  }


  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }

  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }


  // static wrappers

  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);


  // XXX - Why is this here if it isn't in the default pass set?


  bool enableEarlyIfConversion() const override {

    return true;

  }


  void overrideSchedPolicy(MachineSchedPolicy &Policy,

                           const SchedRegion &Region) const override;


  void overridePostRASchedPolicy(MachineSchedPolicy &Policy,

                                 const SchedRegion &Region) const override;


  void mirFileLoaded(MachineFunction &MF) const override;


  unsigned getMaxNumUserSGPRs() const {

    return AMDGPU::getMaxNumUserSGPRs(*this);

  }


  bool hasSMemRealTime() const {

    return HasSMemRealTime;

  }


  bool hasMovrel() const {

    return HasMovrel;

  }


  bool hasVGPRIndexMode() const {

    return HasVGPRIndexMode;

  }


  bool useVGPRIndexMode() const;


  bool hasScalarCompareEq64() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }


  bool hasScalarStores() const {

    return HasScalarStores;

  }


  bool hasScalarAtomics() const {

    return HasScalarAtomics;

  }


  bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }

  bool hasLDSFPAtomicAddF64() const { return GFX90AInsts || GFX1250Insts; }


  /// \returns true if the subtarget has the v_permlanex16_b32 instruction.

  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }


  /// \returns true if the subtarget has the v_permlane64_b32 instruction.

  bool hasPermLane64() const { return getGeneration() >= GFX11; }


  bool hasDPP() const {

    return HasDPP;

  }


  bool hasDPPBroadcasts() const {

    return HasDPP && getGeneration() < GFX10;

  }


  bool hasDPPWavefrontShifts() const {

    return HasDPP && getGeneration() < GFX10;

  }


  bool hasDPP8() const {

    return HasDPP8;

  }


  bool hasDPALU_DPP() const {

    return HasDPALU_DPP;

  }


  bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; }


  bool hasPackedFP32Ops() const {

    return HasPackedFP32Ops;

  }


  // Has V_PK_MOV_B32 opcode


  bool hasPkMovB32() const {

    return GFX90AInsts;

  }


  bool hasFmaakFmamkF32Insts() const {

    return getGeneration() >= GFX10 || hasGFX940Insts();

  }


  bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }


  bool hasImageInsts() const {

    return HasImageInsts;

  }


  bool hasExtendedImageInsts() const {

    return HasExtendedImageInsts;

  }


  bool hasR128A16() const {

    return HasR128A16;

  }


  bool hasA16() const { return HasA16; }


  bool hasG16() const { return HasG16; }


  bool hasOffset3fBug() const {

    return HasOffset3fBug;

  }


  bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }


  bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }


  bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }


  bool hasMSAALoadDstSelBug() const { return HasMSAALoadDstSelBug; }


  bool hasPrivEnabledTrap2NopBug() const { return HasPrivEnabledTrap2NopBug; }


  bool hasNSAEncoding() const { return HasNSAEncoding; }


  bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }


  bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }


  unsigned getNSAMaxSize(bool HasSampler = false) const {

    return AMDGPU::getNSAMaxSize(*this, HasSampler);

  }


  bool hasGFX10_AEncoding() const {

    return GFX10_AEncoding;

  }


  bool hasGFX10_BEncoding() const {

    return GFX10_BEncoding;

  }


  bool hasGFX10_3Insts() const {

    return GFX10_3Insts;

  }


  bool hasMadF16() const;


  bool hasMovB64() const { return GFX940Insts || GFX1250Insts; }


  bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; }


  // Scalar and global loads support scale_offset bit.

  bool hasScaleOffset() const { return GFX1250Insts; }


  bool hasFlatGVSMode() const { return FlatGVSMode; }


  // FLAT GLOBAL VOffset is signed

  bool hasSignedGVSOffset() const { return GFX1250Insts; }


  bool enableSIScheduler() const {

    return EnableSIScheduler;

  }


  bool loadStoreOptEnabled() const {

    return EnableLoadStoreOpt;

  }


  bool hasSGPRInitBug() const {

    return SGPRInitBug;

  }


  bool hasUserSGPRInit16Bug() const {

    return UserSGPRInit16Bug && isWave32();

  }


  bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }


  bool hasNegativeUnalignedScratchOffsetBug() const {

    return NegativeUnalignedScratchOffsetBug;

  }


  bool hasMFMAInlineLiteralBug() const {

    return HasMFMAInlineLiteralBug;

  }


  bool has12DWordStoreHazard() const {

    return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;

  }


  // \returns true if the subtarget supports DWORDX3 load/store instructions.


  bool hasDwordx3LoadStores() const {

    return CIInsts;

  }


  bool hasReadM0MovRelInterpHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0SendMsgHazard() const {

    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

           getGeneration() <= AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDmaHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDirectHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasVcmpxPermlaneHazard() const {

    return HasVcmpxPermlaneHazard;

  }


  bool hasVMEMtoScalarWriteHazard() const {

    return HasVMEMtoScalarWriteHazard;

  }


  bool hasSMEMtoVectorWriteHazard() const {

    return HasSMEMtoVectorWriteHazard;

  }


  bool hasLDSMisalignedBug() const {

    return LDSMisalignedBug && !EnableCuMode;

  }


  bool hasInstFwdPrefetchBug() const {

    return HasInstFwdPrefetchBug;

  }


  bool hasVcmpxExecWARHazard() const {

    return HasVcmpxExecWARHazard;

  }


  bool hasLdsBranchVmemWARHazard() const {

    return HasLdsBranchVmemWARHazard;

  }


  // Shift amount of a 64 bit shift cannot be a highest allocated register

  // if also at the end of the allocation block.


  bool hasShift64HighRegBug() const {

    return GFX90AInsts && !GFX940Insts;

  }


  // Has one cycle hazard on transcendental instruction feeding a

  // non transcendental VALU.

  bool hasTransForwardingHazard() const { return GFX940Insts; }


  // Has one cycle hazard on a VALU instruction partially writing dst with

  // a shift of result bits feeding another VALU instruction.

  bool hasDstSelForwardingHazard() const { return GFX940Insts; }


  // Cannot use op_sel with v_dot instructions.

  bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }


  // Does not have HW interlocs for VALU writing and then reading SGPRs.


  bool hasVDecCoExecHazard() const {

    return GFX940Insts;

  }


  bool hasNSAtoVMEMBug() const {

    return HasNSAtoVMEMBug;

  }


  bool hasNSAClauseBug() const { return HasNSAClauseBug; }


  bool hasHardClauses() const { return MaxHardClauseLength > 0; }


  bool hasGFX90AInsts() const { return GFX90AInsts; }


  bool hasFPAtomicToDenormModeHazard() const {

    return getGeneration() == GFX10;

  }


  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }


  bool hasLdsDirect() const { return getGeneration() >= GFX11; }


  bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }


  bool hasVALUPartialForwardingHazard() const {

    return getGeneration() == GFX11;

  }


  bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }


  bool hasCvtScaleForwardingHazard() const { return GFX950Insts; }


  bool requiresCodeObjectV6() const { return RequiresCOV6; }


  bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }


  bool hasGloballyAddressableScratch() const {

    return HasGloballyAddressableScratch;

  }


  bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }


  bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }


  bool setRegModeNeedsVNOPs() const {

    return GFX1250Insts && getGeneration() == GFX12;

  }


  /// Return if operations acting on VGPR tuples require even alignment.

  bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }


  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.

  bool hasSPackHL() const { return GFX11Insts; }


  /// Return true if the target's EXP instruction has the COMPR flag, which

  /// affects the meaning of the EN (enable) bits.

  bool hasCompressedExport() const { return !GFX11Insts; }


  /// Return true if the target's EXP instruction supports the NULL export

  /// target.

  bool hasNullExportTarget() const { return !GFX11Insts; }


  bool has1_5xVGPRs() const { return Has1_5xVGPRs; }


  bool hasVOPDInsts() const { return HasVOPDInsts; }


  bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }


  /// Return true if the target has the S_DELAY_ALU instruction.

  bool hasDelayAlu() const { return GFX11Insts; }


  bool hasPackedTID() const { return HasPackedTID; }


  // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that

  // hasGFX90AInsts is also true.

  bool hasGFX940Insts() const { return GFX940Insts; }


  // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that

  // hasGFX940Insts and hasGFX90AInsts are also true.

  bool hasGFX950Insts() const { return GFX950Insts; }


  /// Returns true if the target supports

  /// global_load_lds_dwordx3/global_load_lds_dwordx4 or

  /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.


  bool hasLDSLoadB96_B128() const {

    return hasGFX950Insts();

  }


  bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; }


  bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }


  bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; }


  bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }


  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }


  bool hasVmemWriteVgprInOrder() const { return HasVmemWriteVgprInOrder; }


  /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt

  /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.

  bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }


  /// \returns true if inline constants are not supported for F16 pseudo

  /// scalar transcendentals.


  bool hasNoF16PseudoScalarTransInlineConstants() const {

    return getGeneration() == GFX12;

  }


  /// \returns true if the target has instructions with xf32 format support.

  bool hasXF32Insts() const { return HasXF32Insts; }


  /// \returns true if the target has packed f32 instructions that only read 32

  /// bits from a scalar operand (SGPR or literal) and replicates the bits to

  /// both channels.


  bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const {

    return getGeneration() == GFX12 && GFX1250Insts;

  }


  bool hasBitOp3Insts() const { return HasBitOp3Insts; }


  bool hasPermlane16Swap() const { return HasPermlane16Swap; }

  bool hasPermlane32Swap() const { return HasPermlane32Swap; }

  bool hasAshrPkInsts() const { return HasAshrPkInsts; }


  bool hasMinimum3Maximum3F32() const {

    return HasMinimum3Maximum3F32;

  }


  bool hasMinimum3Maximum3F16() const {

    return HasMinimum3Maximum3F16;

  }


  bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; }


  bool hasTanhInsts() const { return HasTanhInsts; }


  bool hasTensorCvtLutInsts() const { return HasTensorCvtLutInsts; }


  bool hasAddPC64Inst() const { return GFX1250Insts; }


  bool has1024AddressableVGPRs() const { return Has1024AddressableVGPRs; }


  bool hasMinimum3Maximum3PKF16() const {

    return HasMinimum3Maximum3PKF16;

  }


  bool hasTransposeLoadF4F6Insts() const { return HasTransposeLoadF4F6Insts; }


  /// \returns true if the target has s_wait_xcnt insertion. Supported for

  /// GFX1250.

  bool hasWaitXCnt() const { return HasWaitXcnt; }


  // A single DWORD instructions can use a 64-bit literal.

  bool has64BitLiterals() const { return Has64BitLiterals; }


  bool hasPointSampleAccel() const { return HasPointSampleAccel; }


  bool hasLdsBarrierArriveAtomic() const { return HasLdsBarrierArriveAtomic; }


  /// \returns The maximum number of instructions that can be enclosed in an

  /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that

  /// instruction.

  unsigned maxHardClauseLength() const { return MaxHardClauseLength; }


  bool hasPrngInst() const { return HasPrngInst; }


  bool hasBVHDualAndBVH8Insts() const { return HasBVHDualAndBVH8Insts; }


  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs

  /// SGPRs

  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;


  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs

  /// VGPRs

  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,

                                    unsigned DynamicVGPRBlockSize) const;


  /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can

  /// be achieved when the only function running on a CU is \p F, each workgroup

  /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p

  /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a

  /// range, so this returns a range as well.

  ///

  /// Note that occupancy can be affected by the scratch allocation as well, but

  /// we do not have enough information to compute it.

  std::pair<unsigned, unsigned> computeOccupancy(const Function &F,

                                                 unsigned LDSSize = 0,

                                                 unsigned NumSGPRs = 0,

                                                 unsigned NumVGPRs = 0) const;


  /// \returns true if the flat_scratch register should be initialized with the

  /// pointer to the wave's scratch memory rather than a size and offset.


  bool flatScratchIsPointer() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns true if the flat_scratch register is initialized by the HW.

  /// In this case it is readonly.

  bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }


  /// \returns true if the architected SGPRs are enabled.

  bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }


  /// \returns true if Global Data Share is supported.

  bool hasGDS() const { return HasGDS; }


  /// \returns true if Global Wave Sync is supported.

  bool hasGWS() const { return HasGWS; }


  /// \returns true if the machine has merged shaders in which s0-s7 are

  /// reserved by the hardware and user SGPRs start at s8


  bool hasMergedShaders() const {

    return getGeneration() >= GFX9;

  }


  // \returns true if the target supports the pre-NGG legacy geometry path.

  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }


  // \returns true if preloading kernel arguments is supported.

  bool hasKernargPreload() const { return KernargPreload; }


  // \returns true if the target has split barriers feature

  bool hasSplitBarriers() const { return getGeneration() >= GFX12; }


  // \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.

  bool hasCvtFP8VOP1Bug() const { return HasCvtFP8Vop1Bug; }


  // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a

  // no-return form.

  bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }


  // \returns true if the target has DX10_CLAMP kernel descriptor mode bit

  bool hasDX10ClampMode() const { return getGeneration() < GFX12; }


  // \returns true if the target has IEEE kernel descriptor mode bit

  bool hasIEEEMode() const { return getGeneration() < GFX12; }


  // \returns true if the target has IEEE fminimum/fmaximum instructions

  bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; }


  // \returns true if the target has WG_RR_MODE kernel descriptor mode bit

  bool hasRrWGMode() const { return getGeneration() >= GFX12; }


  /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative

  /// values.

  bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }


  bool hasGFX1250Insts() const { return GFX1250Insts; }


  bool hasVOPD3() const { return GFX1250Insts; }


  // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions.

  bool hasAddSubU64Insts() const { return HasAddSubU64Insts; }


  // \returns true if the target has V_MAD_U32 instruction.

  bool hasMadU32Inst() const { return HasMadU32Inst; }


  // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions.

  bool hasVectorMulU64() const { return GFX1250Insts; }


  // \returns true if the target has V_MAD_NC_U64_U32/V_MAD_NC_I64_I32

  // instructions.

  bool hasMadU64U32NoCarry() const { return GFX1250Insts; }


  // \returns true if the target has V_{MIN|MAX}_{I|U}64 instructions.

  bool hasIntMinMax64() const { return GFX1250Insts; }


  // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions.

  bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; }


  // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions.

  bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; }


  // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.

  bool hasPkMinMax3Insts() const { return GFX1250Insts; }


  // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.

  bool hasSGetShaderCyclesInst() const { return GFX1250Insts; }


  // \returns true if target has S_SETPRIO_INC_WG instruction.

  bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }


  // \returns true if target has S_WAKEUP_BARRIER instruction.

  bool hasSWakeupBarrier() const { return HasSWakeupBarrier; }


  // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead

  // of sign-extending. Note that GFX1250 has not only fixed the bug but also

  // extended VA to 57 bits.

  bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; }


  // \returns true if the target needs to create a prolog for backward

  // compatibility when preloading kernel arguments.


  bool needsKernArgPreloadProlog() const {

    return hasKernargPreload() && !GFX1250Insts;

  }


  bool hasCondSubInsts() const { return GFX12Insts; }


  bool hasSubClampInsts() const { return hasGFX10_3Insts(); }


  /// \returns SGPR allocation granularity supported by the subtarget.


  unsigned getSGPRAllocGranule() const {

    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

  }


  /// \returns SGPR encoding granularity supported by the subtarget.


  unsigned getSGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);

  }


  /// \returns Total number of SGPRs supported by the subtarget.


  unsigned getTotalNumSGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);

  }


  /// \returns Addressable number of SGPRs supported by the subtarget.


  unsigned getAddressableNumSGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);

  }


  /// \returns Minimum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.


  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {

    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);

  }


  /// \returns Maximum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.


  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {

    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);

  }


  /// \returns Reserved number of SGPRs. This is common

  /// utility function called by MachineFunction and

  /// Function variants of getReservedNumSGPRs.

  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;

  /// \returns Reserved number of SGPRs for given machine function \p MF.

  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;


  /// \returns Reserved number of SGPRs for given function \p F.

  unsigned getReservedNumSGPRs(const Function &F) const;


  /// \returns Maximum number of preloaded SGPRs for the subtarget.

  unsigned getMaxNumPreloadedSGPRs() const;


  /// \returns max num SGPRs. This is the common utility

  /// function called by MachineFunction and Function

  /// variants of getMaxNumSGPRs.

  unsigned getBaseMaxNumSGPRs(const Function &F,

                              std::pair<unsigned, unsigned> WavesPerEU,

                              unsigned PreloadedSGPRs,

                              unsigned ReservedNumSGPRs) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const Function &F) const;


  /// \returns VGPR allocation granularity supported by the subtarget.


  unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getVGPRAllocGranule(this, DynamicVGPRBlockSize);

  }


  /// \returns VGPR encoding granularity supported by the subtarget.


  unsigned getVGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);

  }


  /// \returns Total number of VGPRs supported by the subtarget.


  unsigned getTotalNumVGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);

  }


  /// \returns Addressable number of architectural VGPRs supported by the

  /// subtarget.


  unsigned getAddressableNumArchVGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);

  }


  /// \returns Addressable number of VGPRs supported by the subtarget.


  unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this, DynamicVGPRBlockSize);

  }


  /// \returns the minimum number of VGPRs that will prevent achieving more than

  /// the specified number of waves \p WavesPerEU.


  unsigned getMinNumVGPRs(unsigned WavesPerEU,

                          unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU,

                                           DynamicVGPRBlockSize);

  }


  /// \returns the maximum number of VGPRs that can be used and still achieved

  /// at least the specified number of waves \p WavesPerEU.


  unsigned getMaxNumVGPRs(unsigned WavesPerEU,

                          unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU,

                                           DynamicVGPRBlockSize);

  }


  /// \returns max num VGPRs. This is the common utility function

  /// called by MachineFunction and Function variants of getMaxNumVGPRs.

  unsigned

  getBaseMaxNumVGPRs(const Function &F,

                     std::pair<unsigned, unsigned> NumVGPRBounds) const;


  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const Function &F) const;


  unsigned getMaxNumAGPRs(const Function &F) const {

    return getMaxNumVGPRs(F);

  }


  /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number

  /// of waves per execution unit required for the function \p MF.

  std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;


  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;


  bool supportsWave32() const { return getGeneration() >= GFX10; }


  bool supportsWave64() const { return !hasGFX1250Insts(); }


  bool isWave32() const {

    return getWavefrontSize() == 32;

  }


  bool isWave64() const {

    return getWavefrontSize() == 64;

  }


  /// Returns if the wavesize of this subtarget is known reliable. This is false

  /// only for the a default target-cpu that does not have an explicit

  /// +wavefrontsize target feature.


  bool isWaveSizeKnown() const {

    return hasFeature(AMDGPU::FeatureWavefrontSize32) ||

           hasFeature(AMDGPU::FeatureWavefrontSize64);

  }


  const TargetRegisterClass *getBoolRC() const {

    return getRegisterInfo()->getBoolRC();

  }


  /// \returns Maximum number of work groups per compute unit supported by the

  /// subtarget and limited by given \p FlatWorkGroupSize.


  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);

  }


  /// \returns Minimum flat work group size supported by the subtarget.


  unsigned getMinFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);

  }


  /// \returns Maximum flat work group size supported by the subtarget.


  unsigned getMaxFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);

  }


  /// \returns Number of waves per execution unit required to support the given

  /// \p FlatWorkGroupSize.

  unsigned


  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);

  }


  /// \returns Minimum number of waves per execution unit supported by the

  /// subtarget.


  unsigned getMinWavesPerEU() const override {

    return AMDGPU::IsaInfo::getMinWavesPerEU(this);

  }


  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,

                             SDep &Dep,

                             const TargetSchedModel *SchedModel) const override;


  // \returns true if it's beneficial on this subtarget for the scheduler to

  // cluster stores as well as loads.

  bool shouldClusterStores() const { return getGeneration() >= GFX11; }


  // \returns the number of address arguments from which to enable MIMG NSA

  // on supported architectures.

  unsigned getNSAThreshold(const MachineFunction &MF) const;


  // \returns true if the subtarget has a hazard requiring an "s_nop 0"

  // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".

  bool requiresNopBeforeDeallocVGPRs() const { return !GFX1250Insts; }


  // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on

  // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.

  bool requiresWaitIdleBeforeGetReg() const { return GFX1250Insts; }


  bool isDynamicVGPREnabled() const { return DynamicVGPR; }


  unsigned getDynamicVGPRBlockSize() const {

    return DynamicVGPRBlockSize32 ? 32 : 16;

  }


  bool requiresDisjointEarlyClobberAndUndef() const override {

    // AMDGPU doesn't care if early-clobber and undef operands are allocated

    // to the same register.

    return false;

  }


  // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything

  // and surronded by S_WAIT_ALU(0xFFE3).


  bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {

    return getGeneration() == GFX12;

  }


  // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base

  // read.


  bool hasScratchBaseForwardingHazard() const {

    return GFX1250Insts && getGeneration() == GFX12;

  }


  // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit

  // result.


  bool hasFlatScratchHiInB64InstHazard() const {

    return GFX1250Insts && getGeneration() == GFX12;

  }


  /// \returns true if the subtarget supports clusters of workgroups.

  bool hasClusters() const { return HasClusters; }


  /// \returns true if the subtarget requires a wait for xcnt before VMEM

  /// accesses that must never be repeated in the event of a page fault/re-try.

  /// Atomic stores/rmw and all volatile accesses fall under this criteria.


  bool requiresWaitXCntForSingleAccessInstructions() const {

    return GFX1250Insts;

  }


  /// \returns the number of significant bits in the immediate field of the

  /// S_NOP instruction.


  unsigned getSNopBits() const {

    if (getGeneration() >= AMDGPUSubtarget::GFX12)

      return 7;

    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)

      return 4;

    return 3;

  }


  /// \returns true if the sub-target supports buffer resource (V#) with 45-bit

  /// num_records.


  bool has45BitNumRecordsBufferResource() const {

    return Has45BitNumRecordsBufferResource;

  }


  bool requiresWaitsBeforeSystemScopeStores() const {

    return RequiresWaitsBeforeSystemScopeStores;

  }


};


class GCNUserSGPRUsageInfo {

public:

  bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }


  bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }


  bool hasDispatchPtr() const { return DispatchPtr; }


  bool hasQueuePtr() const { return QueuePtr; }


  bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }


  bool hasDispatchID() const { return DispatchID; }


  bool hasFlatScratchInit() const { return FlatScratchInit; }


  bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }


  unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }


  unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }


  unsigned getNumFreeUserSGPRs();


  void allocKernargPreloadSGPRs(unsigned NumSGPRs);


  enum UserSGPRID : unsigned {

    ImplicitBufferPtrID = 0,

    PrivateSegmentBufferID = 1,

    DispatchPtrID = 2,

    QueuePtrID = 3,

    KernargSegmentPtrID = 4,

    DispatchIdID = 5,

    FlatScratchInitID = 6,

    PrivateSegmentSizeID = 7

  };


  // Returns the size in number of SGPRs for preload user SGPR field.


  static unsigned getNumUserSGPRForField(UserSGPRID ID) {

    switch (ID) {

    case ImplicitBufferPtrID:

      return 2;

    case PrivateSegmentBufferID:

      return 4;

    case DispatchPtrID:

      return 2;

    case QueuePtrID:

      return 2;

    case KernargSegmentPtrID:

      return 2;

    case DispatchIdID:

      return 2;

    case FlatScratchInitID:

      return 2;

    case PrivateSegmentSizeID:

      return 1;

    }

    llvm_unreachable("Unknown UserSGPRID.");

  }


  GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);


private:

  const GCNSubtarget &ST;


  // Private memory buffer

  // Compute directly in sgpr[0:1]

  // Other shaders indirect 64-bits at sgpr[0:1]

  bool ImplicitBufferPtr = false;


  bool PrivateSegmentBuffer = false;


  bool DispatchPtr = false;


  bool QueuePtr = false;


  bool KernargSegmentPtr = false;


  bool DispatchID = false;


  bool FlatScratchInit = false;


  bool PrivateSegmentSize = false;


  unsigned NumKernargPreloadSGPRs = 0;


  unsigned NumUsedUserSGPRs = 0;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

AMDGPUBaseInfo.h

AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.

AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

F
#define F(x, y, z)
Definition MD5.cpp:54

SIFrameLowering.h

SIISelLowering.h
SI DAG Lowering interface definition.

SIInstrInfo.h
Interface definition for SIInstrInfo.

AMDGPUGenSubtargetInfo

llvm::AMDGPURegisterBankInfo
Definition AMDGPURegisterBankInfo.h:42

llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition AMDGPUSubtarget.h:197

llvm::AMDGPUSubtarget::Generation
Generation
Definition AMDGPUSubtarget.h:32

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition AMDGPUSubtarget.h:44

llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition AMDGPUSubtarget.h:33

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition AMDGPUSubtarget.h:314

llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(Triple TT)
Definition AMDGPUSubtarget.cpp:35

llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition AMDGPUSubtarget.h:382

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:189

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:310

llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition AMDGPUBaseInfo.h:152

llvm::CallLowering
Definition CallLowering.h:45

llvm::Function
Definition Function.h:64

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNSubtarget::hasPrefetch
bool hasPrefetch() const
Definition GCNSubtarget.h:1040

llvm::GCNSubtarget::hasMemoryAtomicFaddF32DenormalSupport
bool hasMemoryAtomicFaddF32DenormalSupport() const
Definition GCNSubtarget.h:977

llvm::GCNSubtarget::GFX10Insts
bool GFX10Insts
Definition GCNSubtarget.h:113

llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition GCNSubtarget.h:443

llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition GCNSubtarget.h:766

llvm::GCNSubtarget::hasMinimum3Maximum3F32
bool hasMinimum3Maximum3F32() const
Definition GCNSubtarget.h:1458

llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition GCNSubtarget.h:65

llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition GCNSubtarget.cpp:391

llvm::GCNSubtarget::HasIEEEMinimumMaximumInsts
bool HasIEEEMinimumMaximumInsts
Definition GCNSubtarget.h:279

llvm::GCNSubtarget::HasDot6Insts
bool HasDot6Insts
Definition GCNSubtarget.h:158

llvm::GCNSubtarget::hasAtomicDsPkAdd16Insts
bool hasAtomicDsPkAdd16Insts() const
Definition GCNSubtarget.h:939

llvm::GCNSubtarget::hasSDWAOmod
bool hasSDWAOmod() const
Definition GCNSubtarget.h:815

llvm::GCNSubtarget::hasFlatGVSMode
bool hasFlatGVSMode() const
Definition GCNSubtarget.h:1229

llvm::GCNSubtarget::HasLdsBranchVmemWARHazard
bool HasLdsBranchVmemWARHazard
Definition GCNSubtarget.h:263

llvm::GCNSubtarget::hasPermlane32Swap
bool hasPermlane32Swap() const
Definition GCNSubtarget.h:1455

llvm::GCNSubtarget::HasCvtPkNormVOP2Insts
bool HasCvtPkNormVOP2Insts
Definition GCNSubtarget.h:174

llvm::GCNSubtarget::HasDot11Insts
bool HasDot11Insts
Definition GCNSubtarget.h:163

llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition GCNSubtarget.h:548

llvm::GCNSubtarget::Has1_5xVGPRs
bool Has1_5xVGPRs
Definition GCNSubtarget.h:272

llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition GCNSubtarget.h:503

llvm::GCNSubtarget::hasPkFmacF16Inst
bool hasPkFmacF16Inst() const
Definition GCNSubtarget.h:919

llvm::GCNSubtarget::HasClusters
bool HasClusters
Definition GCNSubtarget.h:300

llvm::GCNSubtarget::HasAtomicFMinFMaxF64FlatInsts
bool HasAtomicFMinFMaxF64FlatInsts
Definition GCNSubtarget.h:182

llvm::GCNSubtarget::hasPkMinMax3Insts
bool hasPkMinMax3Insts() const
Definition GCNSubtarget.h:1608

llvm::GCNSubtarget::hasDot2Insts
bool hasDot2Insts() const
Definition GCNSubtarget.h:845

llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition GCNSubtarget.h:758

llvm::GCNSubtarget::HasExtendedImageInsts
bool HasExtendedImageInsts
Definition GCNSubtarget.h:143

llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition GCNSubtarget.h:1545

llvm::GCNSubtarget::hasA16
bool hasA16() const
Definition GCNSubtarget.h:1180

llvm::GCNSubtarget::hasSDWAScalar
bool hasSDWAScalar() const
Definition GCNSubtarget.h:819

llvm::GCNSubtarget::hasRrWGMode
bool hasRrWGMode() const
Definition GCNSubtarget.h:1575

llvm::GCNSubtarget::supportsBackOffBarrier
bool supportsBackOffBarrier() const
Definition GCNSubtarget.h:629

llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition GCNSubtarget.h:1108

llvm::GCNSubtarget::EnableXNACK
bool EnableXNACK
Definition GCNSubtarget.h:87

llvm::GCNSubtarget::has1_5xVGPRs
bool has1_5xVGPRs() const
Definition GCNSubtarget.h:1394

llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition GCNSubtarget.h:394

llvm::GCNSubtarget::HasG16
bool HasG16
Definition GCNSubtarget.h:146

llvm::GCNSubtarget::hasSafeCUPrefetch
bool hasSafeCUPrefetch() const
Definition GCNSubtarget.h:1046

llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition GCNSubtarget.h:449

llvm::GCNSubtarget::hasImageStoreD16Bug
bool hasImageStoreD16Bug() const
Definition GCNSubtarget.h:1188

llvm::GCNSubtarget::hasNonNSAEncoding
bool hasNonNSAEncoding() const
Definition GCNSubtarget.h:1200

llvm::GCNSubtarget::HasSadInsts
bool HasSadInsts
Definition GCNSubtarget.h:171

llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition GCNSubtarget.h:537

llvm::GCNSubtarget::mirFileLoaded
void mirFileLoaded(MachineFunction &MF) const override
Definition GCNSubtarget.cpp:376

llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition GCNSubtarget.h:528

llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition GCNSubtarget.h:1238

llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition GCNSubtarget.h:1067

llvm::GCNSubtarget::hasDPPWavefrontShifts
bool hasDPPWavefrontShifts() const
Definition GCNSubtarget.h:1139

llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition GCNSubtarget.h:1635

llvm::GCNSubtarget::hasAtomicFMinFMaxF64FlatInsts
bool hasAtomicFMinFMaxF64FlatInsts() const
Definition GCNSubtarget.h:935

llvm::GCNSubtarget::hasLdsAtomicAddF64
bool hasLdsAtomicAddF64() const
Definition GCNSubtarget.h:742

llvm::GCNSubtarget::HasSALUFloatInsts
bool HasSALUFloatInsts
Definition GCNSubtarget.h:242

llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition GCNSubtarget.h:754

llvm::GCNSubtarget::GFX940Insts
bool GFX940Insts
Definition GCNSubtarget.h:111

llvm::GCNSubtarget::Has45BitNumRecordsBufferResource
bool Has45BitNumRecordsBufferResource
Definition GCNSubtarget.h:298

llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition GCNSubtarget.h:1526

llvm::GCNSubtarget::hasSDWAMac
bool hasSDWAMac() const
Definition GCNSubtarget.h:827

llvm::GCNSubtarget::hasFP8ConversionInsts
bool hasFP8ConversionInsts() const
Definition GCNSubtarget.h:901

llvm::GCNSubtarget::hasShift64HighRegBug
bool hasShift64HighRegBug() const
Definition GCNSubtarget.h:1316

llvm::GCNSubtarget::hasDot7Insts
bool hasDot7Insts() const
Definition GCNSubtarget.h:865

llvm::GCNSubtarget::HasScalarDwordx3Loads
bool HasScalarDwordx3Loads
Definition GCNSubtarget.h:129

llvm::GCNSubtarget::hasApertureRegs
bool hasApertureRegs() const
Definition GCNSubtarget.h:663

llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition GCNSubtarget.h:67

llvm::GCNSubtarget::HasCubeInsts
bool HasCubeInsts
Definition GCNSubtarget.h:169

llvm::GCNSubtarget::LDSMisalignedBug
bool LDSMisalignedBug
Definition GCNSubtarget.h:236

llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition GCNSubtarget.h:532

llvm::GCNSubtarget::hasBitOp3Insts
bool hasBitOp3Insts() const
Definition GCNSubtarget.h:1452

llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard
bool hasFPAtomicToDenormModeHazard() const
Definition GCNSubtarget.h:1346

llvm::GCNSubtarget::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs() const
Definition GCNSubtarget.h:1724

llvm::GCNSubtarget::DumpCode
bool DumpCode
Definition GCNSubtarget.h:100

llvm::GCNSubtarget::hasFlatInstOffsets
bool hasFlatInstOffsets() const
Definition GCNSubtarget.h:693

llvm::GCNSubtarget::HasSetPrioIncWgInst
bool HasSetPrioIncWgInst
Definition GCNSubtarget.h:291

llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition GCNSubtarget.h:1032

llvm::GCNSubtarget::hasAtomicFMinFMaxF32FlatInsts
bool hasAtomicFMinFMaxF32FlatInsts() const
Definition GCNSubtarget.h:931

llvm::GCNSubtarget::HasDot13Insts
bool HasDot13Insts
Definition GCNSubtarget.h:165

llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition GCNSubtarget.h:1842

llvm::GCNSubtarget::HasXF32Insts
bool HasXF32Insts
Definition GCNSubtarget.h:199

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition GCNSubtarget.h:1656

llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition GCNSubtarget.h:1640

llvm::GCNSubtarget::HasSDWAScalar
bool HasSDWAScalar
Definition GCNSubtarget.h:133

llvm::GCNSubtarget::hasIEEEMinimumMaximumInsts
bool hasIEEEMinimumMaximumInsts() const
Definition GCNSubtarget.h:1572

llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::GCNSubtarget::hasLdsBranchVmemWARHazard
bool hasLdsBranchVmemWARHazard() const
Definition GCNSubtarget.h:1310

llvm::GCNSubtarget::hasDefaultComponentZero
bool hasDefaultComponentZero() const
Definition GCNSubtarget.h:994

llvm::GCNSubtarget::hasGetWaveIdInst
bool hasGetWaveIdInst() const
Definition GCNSubtarget.h:1008

llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition GCNSubtarget.h:1388

llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition GCNSubtarget.h:1344

llvm::GCNSubtarget::hasFlatScratchHiInB64InstHazard
bool hasFlatScratchHiInB64InstHazard() const
Definition GCNSubtarget.h:1881

llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition GCNSubtarget.h:1326

llvm::GCNSubtarget::HasMadU32Inst
bool HasMadU32Inst
Definition GCNSubtarget.h:286

llvm::GCNSubtarget::HasVmemPrefInsts
bool HasVmemPrefInsts
Definition GCNSubtarget.h:259

llvm::GCNSubtarget::UserSGPRInit16Bug
bool UserSGPRInit16Bug
Definition GCNSubtarget.h:120

llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition GCNSubtarget.h:1071

llvm::GCNSubtarget::hasRelaxedBufferOOBMode
bool hasRelaxedBufferOOBMode() const
Definition GCNSubtarget.h:661

llvm::GCNSubtarget::UnalignedDSAccess
bool UnalignedDSAccess
Definition GCNSubtarget.h:239

llvm::GCNSubtarget::HasDPP8
bool HasDPP8
Definition GCNSubtarget.h:138

llvm::GCNSubtarget::GFX11Insts
bool GFX11Insts
Definition GCNSubtarget.h:114

llvm::GCNSubtarget::AddNoCarryInsts
bool AddNoCarryInsts
Definition GCNSubtarget.h:234

llvm::GCNSubtarget::hasPkAddMinMaxInsts
bool hasPkAddMinMaxInsts() const
Definition GCNSubtarget.h:1605

llvm::GCNSubtarget::HasScalarStores
bool HasScalarStores
Definition GCNSubtarget.h:130

llvm::GCNSubtarget::HasDPALU_DPP
bool HasDPALU_DPP
Definition GCNSubtarget.h:139

llvm::GCNSubtarget::hasDLInsts
bool hasDLInsts() const
Definition GCNSubtarget.h:835

llvm::GCNSubtarget::hasExtendedImageInsts
bool hasExtendedImageInsts() const
Definition GCNSubtarget.h:1172

llvm::GCNSubtarget::GFX10_BEncoding
bool GFX10_BEncoding
Definition GCNSubtarget.h:150

llvm::GCNSubtarget::hasVmemWriteVgprInOrder
bool hasVmemWriteVgprInOrder() const
Definition GCNSubtarget.h:1430

llvm::GCNSubtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition GCNSubtarget.h:469

llvm::GCNSubtarget::getSNopBits
unsigned getSNopBits() const
Definition GCNSubtarget.h:1897

llvm::GCNSubtarget::HasTransposeLoadF4F6Insts
bool HasTransposeLoadF4F6Insts
Definition GCNSubtarget.h:250

llvm::GCNSubtarget::HasMADIntraFwdBug
bool HasMADIntraFwdBug
Definition GCNSubtarget.h:273

llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition GCNSubtarget.h:893

llvm::GCNSubtarget::HasFlatAtomicFaddF32Inst
bool HasFlatAtomicFaddF32Inst
Definition GCNSubtarget.h:193

llvm::GCNSubtarget::hasLDSLoadB96_B128
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
Definition GCNSubtarget.h:1416

llvm::GCNSubtarget::has1024AddressableVGPRs
bool has1024AddressableVGPRs() const
Definition GCNSubtarget.h:1474

llvm::GCNSubtarget::supportsAgentScopeFineGrainedRemoteMemoryAtomics
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const
Definition GCNSubtarget.h:984

llvm::GCNSubtarget::hasFlatScratchInsts
bool hasFlatScratchInsts() const
Definition GCNSubtarget.h:701

llvm::GCNSubtarget::HasFlatSegmentOffsetBug
bool HasFlatSegmentOffsetBug
Definition GCNSubtarget.h:267

llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition GCNSubtarget.h:746

llvm::GCNSubtarget::hasArchitectedSGPRs
bool hasArchitectedSGPRs() const
Definition GCNSubtarget.h:1535

llvm::GCNSubtarget::hasFmaakFmamkF64Insts
bool hasFmaakFmamkF64Insts() const
Definition GCNSubtarget.h:1166

llvm::GCNSubtarget::hasTanhInsts
bool hasTanhInsts() const
Definition GCNSubtarget.h:1468

llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition GCNSubtarget.h:427

llvm::GCNSubtarget::hasScaleOffset
bool hasScaleOffset() const
Definition GCNSubtarget.h:1227

llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition GCNSubtarget.h:587

llvm::GCNSubtarget::hasPrivEnabledTrap2NopBug
bool hasPrivEnabledTrap2NopBug() const
Definition GCNSubtarget.h:1196

llvm::GCNSubtarget::HasPkFmacF16Inst
bool HasPkFmacF16Inst
Definition GCNSubtarget.h:178

llvm::GCNSubtarget::hasMFMAInlineLiteralBug
bool hasMFMAInlineLiteralBug() const
Definition GCNSubtarget.h:1256

llvm::GCNSubtarget::NegativeScratchOffsetBug
bool NegativeScratchOffsetBug
Definition GCNSubtarget.h:121

llvm::GCNSubtarget::hasCvtScaleForwardingHazard
bool hasCvtScaleForwardingHazard() const
Definition GCNSubtarget.h:1362

llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition GCNSubtarget.h:1718

llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition GCNSubtarget.h:1832

llvm::GCNSubtarget::hasSMemTimeInst
bool hasSMemTimeInst() const
Definition GCNSubtarget.h:1012

llvm::GCNSubtarget::HasVGPRIndexMode
bool HasVGPRIndexMode
Definition GCNSubtarget.h:128

llvm::GCNSubtarget::HasMAIInsts
bool HasMAIInsts
Definition GCNSubtarget.h:166

llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition GCNSubtarget.h:645

llvm::GCNSubtarget::hasTensorCvtLutInsts
bool hasTensorCvtLutInsts() const
Definition GCNSubtarget.h:1470

llvm::GCNSubtarget::hasNegativeScratchOffsetBug
bool hasNegativeScratchOffsetBug() const
Definition GCNSubtarget.h:1250

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition GCNSubtarget.h:323

llvm::GCNSubtarget::hasSWakeupBarrier
bool hasSWakeupBarrier() const
Definition GCNSubtarget.h:1617

llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition GCNSubtarget.h:1809

llvm::GCNSubtarget::AutoWaitcntBeforeBarrier
bool AutoWaitcntBeforeBarrier
Definition GCNSubtarget.h:76

llvm::GCNSubtarget::HasVscnt
bool HasVscnt
Definition GCNSubtarget.h:215

llvm::GCNSubtarget::hasDot1Insts
bool hasDot1Insts() const
Definition GCNSubtarget.h:841

llvm::GCNSubtarget::hasDot3Insts
bool hasDot3Insts() const
Definition GCNSubtarget.h:849

llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition GCNSubtarget.cpp:196

llvm::GCNSubtarget::hasMADIntraFwdBug
bool hasMADIntraFwdBug() const
Definition GCNSubtarget.h:1192

llvm::GCNSubtarget::hasVALUMaskWriteHazard
bool hasVALUMaskWriteHazard() const
Definition GCNSubtarget.h:1372

llvm::GCNSubtarget::hasCondSubInsts
bool hasCondSubInsts() const
Definition GCNSubtarget.h:1630

llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition GCNSubtarget.h:40

llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
Definition GCNSubtarget.h:41

llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
Definition GCNSubtarget.h:42

llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition GCNSubtarget.h:345

llvm::GCNSubtarget::HasDot3Insts
bool HasDot3Insts
Definition GCNSubtarget.h:155

llvm::GCNSubtarget::hasAutoWaitcntBeforeBarrier
bool hasAutoWaitcntBeforeBarrier() const
Definition GCNSubtarget.h:623

llvm::GCNSubtarget::hasNSAClauseBug
bool hasNSAClauseBug() const
Definition GCNSubtarget.h:1340

llvm::GCNSubtarget::EnableSRAMECC
bool EnableSRAMECC
Definition GCNSubtarget.h:212

llvm::GCNSubtarget::HasSDWAOmod
bool HasSDWAOmod
Definition GCNSubtarget.h:132

llvm::GCNSubtarget::hasAtomicFaddRtnInsts
bool hasAtomicFaddRtnInsts() const
Definition GCNSubtarget.h:947

llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition GCNSubtarget.h:1645

llvm::GCNSubtarget::hasGFX1250Insts
bool hasGFX1250Insts() const
Definition GCNSubtarget.h:1581

llvm::GCNSubtarget::HasLdsBarrierArriveAtomic
bool HasLdsBarrierArriveAtomic
Definition GCNSubtarget.h:290

llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition GCNSubtarget.h:365

llvm::GCNSubtarget::HasDPP
bool HasDPP
Definition GCNSubtarget.h:137

llvm::GCNSubtarget::hasSafeSmemPrefetch
bool hasSafeSmemPrefetch() const
Definition GCNSubtarget.h:1044

llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Definition GCNSubtarget.cpp:630

llvm::GCNSubtarget::overridePostRASchedPolicy
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:339

llvm::GCNSubtarget::HasShaderCyclesHiLoRegisters
bool HasShaderCyclesHiLoRegisters
Definition GCNSubtarget.h:220

llvm::GCNSubtarget::EnableLoadStoreOpt
bool EnableLoadStoreOpt
Definition GCNSubtarget.h:95

llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

llvm::GCNSubtarget::hasPkMovB32
bool hasPkMovB32() const
Definition GCNSubtarget.h:1158

llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition GCNSubtarget.h:1381

llvm::GCNSubtarget::hasGFX10_3Insts
bool hasGFX10_3Insts() const
Definition GCNSubtarget.h:1216

llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition GCNSubtarget.h:1059

llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition GCNSubtarget.h:613

llvm::GCNSubtarget::hasScalarSubwordLoads
bool hasScalarSubwordLoads() const
Definition GCNSubtarget.h:515

llvm::GCNSubtarget::hasDot11Insts
bool hasDot11Insts() const
Definition GCNSubtarget.h:881

llvm::GCNSubtarget::HasTanhInsts
bool HasTanhInsts
Definition GCNSubtarget.h:248

llvm::GCNSubtarget::enableFlatScratch
bool enableFlatScratch() const
Definition GCNSubtarget.h:718

llvm::GCNSubtarget::KernargPreload
bool KernargPreload
Definition GCNSubtarget.h:83

llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition GCNSubtarget.cpp:387

llvm::GCNSubtarget::hasDsAtomicAsyncBarrierArriveB64PipeBug
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
Definition GCNSubtarget.h:1869

llvm::GCNSubtarget::HasGWS
bool HasGWS
Definition GCNSubtarget.h:233

llvm::GCNSubtarget::hasMin3Max3PKF16
bool hasMin3Max3PKF16() const
Definition GCNSubtarget.h:1466

llvm::GCNSubtarget::DynamicVGPR
bool DynamicVGPR
Definition GCNSubtarget.h:205

llvm::GCNSubtarget::EnablePreciseMemory
bool EnablePreciseMemory
Definition GCNSubtarget.h:92

llvm::GCNSubtarget::hasUnalignedBufferAccess
bool hasUnalignedBufferAccess() const
Definition GCNSubtarget.h:633

llvm::GCNSubtarget::GFX10_3Insts
bool GFX10_3Insts
Definition GCNSubtarget.h:117

llvm::GCNSubtarget::HasGetWaveIdInst
bool HasGetWaveIdInst
Definition GCNSubtarget.h:217

llvm::GCNSubtarget::hasR128A16
bool hasR128A16() const
Definition GCNSubtarget.h:1176

llvm::GCNSubtarget::hasCvtPkNormVOP3Insts
bool hasCvtPkNormVOP3Insts() const
Definition GCNSubtarget.h:915

llvm::GCNSubtarget::hasOffset3fBug
bool hasOffset3fBug() const
Definition GCNSubtarget.h:1184

llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition GCNSubtarget.h:1265

llvm::GCNSubtarget::hasPrngInst
bool hasPrngInst() const
Definition GCNSubtarget.h:1498

llvm::GCNSubtarget::hasSignedScratchOffsets
bool hasSignedScratchOffsets() const
Definition GCNSubtarget.h:1579

llvm::GCNSubtarget::GFX9Insts
bool GFX9Insts
Definition GCNSubtarget.h:109

llvm::GCNSubtarget::HasPrivEnabledTrap2NopBug
bool HasPrivEnabledTrap2NopBug
Definition GCNSubtarget.h:271

llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition GCNSubtarget.h:723

llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition GCNSubtarget.h:1242

llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition GCNSubtarget.h:689

llvm::GCNSubtarget::hasFmaMixBF16Insts
bool hasFmaMixBF16Insts() const
Definition GCNSubtarget.h:493

llvm::GCNSubtarget::hasGetPCZeroExtension
bool hasGetPCZeroExtension() const
Definition GCNSubtarget.h:1622

llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition GCNSubtarget.h:1129

llvm::GCNSubtarget::HasBVHDualAndBVH8Insts
bool HasBVHDualAndBVH8Insts
Definition GCNSubtarget.h:252

llvm::GCNSubtarget::requiresNopBeforeDeallocVGPRs
bool requiresNopBeforeDeallocVGPRs() const
Definition GCNSubtarget.h:1850

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1735

llvm::GCNSubtarget::hasVMemToLDSLoad
bool hasVMemToLDSLoad() const
Definition GCNSubtarget.h:1420

llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition GCNSubtarget.h:521

llvm::GCNSubtarget::supportsWave32
bool supportsWave32() const
Definition GCNSubtarget.h:1783

llvm::GCNSubtarget::FlatGVSMode
bool FlatGVSMode
Definition GCNSubtarget.h:227

llvm::GCNSubtarget::HasNSAClauseBug
bool HasNSAClauseBug
Definition GCNSubtarget.h:265

llvm::GCNSubtarget::hasVcmpxExecWARHazard
bool hasVcmpxExecWARHazard() const
Definition GCNSubtarget.h:1306

llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition GCNSubtarget.h:675

llvm::GCNSubtarget::hasFlatAtomicFaddF32Inst
bool hasFlatAtomicFaddF32Inst() const
Definition GCNSubtarget.h:967

llvm::GCNSubtarget::hasKernargPreload
bool hasKernargPreload() const
Definition GCNSubtarget.h:1553

llvm::GCNSubtarget::HasMin3Max3PKF16
bool HasMin3Max3PKF16
Definition GCNSubtarget.h:282

llvm::GCNSubtarget::FlatGlobalInsts
bool FlatGlobalInsts
Definition GCNSubtarget.h:225

llvm::GCNSubtarget::hasFP8Insts
bool hasFP8Insts() const
Definition GCNSubtarget.h:897

llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition GCNSubtarget.h:1765

llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition GCNSubtarget.h:1269

llvm::GCNSubtarget::isDynamicVGPREnabled
bool isDynamicVGPREnabled() const
Definition GCNSubtarget.h:1856

llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition GCNSubtarget.h:335

llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition GCNSubtarget.h:241

llvm::GCNSubtarget::hasRequiredExportPriority
bool hasRequiredExportPriority() const
Definition GCNSubtarget.h:1428

llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition GCNSubtarget.h:1329

llvm::GCNSubtarget::hasLdsWaitVMSRC
bool hasLdsWaitVMSRC() const
Definition GCNSubtarget.h:1354

llvm::GCNSubtarget::hasMSAALoadDstSelBug
bool hasMSAALoadDstSelBug() const
Definition GCNSubtarget.h:1194

llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition GCNSubtarget.h:1803

llvm::GCNSubtarget::HasPermlane16Swap
bool HasPermlane16Swap
Definition GCNSubtarget.h:253

llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
Definition GCNSubtarget.cpp:541

llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition GCNSubtarget.h:1162

llvm::GCNSubtarget::hasClusters
bool hasClusters() const
Definition GCNSubtarget.h:1886

llvm::GCNSubtarget::hasVscnt
bool hasVscnt() const
Definition GCNSubtarget.h:1004

llvm::GCNSubtarget::HasMinimum3Maximum3F32
bool HasMinimum3Maximum3F32
Definition GCNSubtarget.h:280

llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition GCNSubtarget.h:811

llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition GCNSubtarget.h:349

llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition GCNSubtarget.h:1713

llvm::GCNSubtarget::NegativeUnalignedScratchOffsetBug
bool NegativeUnalignedScratchOffsetBug
Definition GCNSubtarget.h:122

llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition GCNSubtarget.h:1342

llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition GCNSubtarget.h:597

llvm::GCNSubtarget::hasExtendedWaitCounts
bool hasExtendedWaitCounts() const
Definition GCNSubtarget.h:1434

llvm::GCNSubtarget::HasImageInsts
bool HasImageInsts
Definition GCNSubtarget.h:142

llvm::GCNSubtarget::hasBVHDualAndBVH8Insts
bool hasBVHDualAndBVH8Insts() const
Definition GCNSubtarget.h:1500

llvm::GCNSubtarget::hasMinimum3Maximum3PKF16
bool hasMinimum3Maximum3PKF16() const
Definition GCNSubtarget.h:1476

llvm::GCNSubtarget::hasLshlAddU64Inst
bool hasLshlAddU64Inst() const
Definition GCNSubtarget.h:1224

llvm::GCNSubtarget::hasLDSMisalignedBug
bool hasLDSMisalignedBug() const
Definition GCNSubtarget.h:1298

llvm::GCNSubtarget::HasPartialNSAEncoding
bool HasPartialNSAEncoding
Definition GCNSubtarget.h:148

llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition GCNSubtarget.h:762

llvm::GCNSubtarget::hasFmacF64Inst
bool hasFmacF64Inst() const
Definition GCNSubtarget.h:839

llvm::GCNSubtarget::EnablePRTStrictNull
bool EnablePRTStrictNull
Definition GCNSubtarget.h:99

llvm::GCNSubtarget::RequiresWaitsBeforeSystemScopeStores
bool RequiresWaitsBeforeSystemScopeStores
Definition GCNSubtarget.h:301

llvm::GCNSubtarget::hasXF32Insts
bool hasXF32Insts() const
Definition GCNSubtarget.h:1443

llvm::GCNSubtarget::hasInstPrefetch
bool hasInstPrefetch() const
Definition GCNSubtarget.h:1036

llvm::GCNSubtarget::hasAddPC64Inst
bool hasAddPC64Inst() const
Definition GCNSubtarget.h:1472

llvm::GCNSubtarget::maxHardClauseLength
unsigned maxHardClauseLength() const
Definition GCNSubtarget.h:1496

llvm::GCNSubtarget::hasAshrPkInsts
bool hasAshrPkInsts() const
Definition GCNSubtarget.h:1456

llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition GCNSubtarget.h:807

llvm::GCNSubtarget::hasVcmpxPermlaneHazard
bool hasVcmpxPermlaneHazard() const
Definition GCNSubtarget.h:1286

llvm::GCNSubtarget::HasDLInsts
bool HasDLInsts
Definition GCNSubtarget.h:151

llvm::GCNSubtarget::hasUserSGPRInit16Bug
bool hasUserSGPRInit16Bug() const
Definition GCNSubtarget.h:1246

llvm::GCNSubtarget::DynamicVGPRBlockSize32
bool DynamicVGPRBlockSize32
Definition GCNSubtarget.h:206

llvm::GCNSubtarget::hasExportInsts
bool hasExportInsts() const
Definition GCNSubtarget.h:735

llvm::GCNSubtarget::hasDPP
bool hasDPP() const
Definition GCNSubtarget.h:1131

llvm::GCNSubtarget::hasVINTERPEncoding
bool hasVINTERPEncoding() const
Definition GCNSubtarget.h:739

llvm::GCNSubtarget::hasGloballyAddressableScratch
bool hasGloballyAddressableScratch() const
Definition GCNSubtarget.h:1368

llvm::GCNSubtarget::getRegBankInfo
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition GCNSubtarget.h:357

llvm::GCNSubtarget::hasAddSubU64Insts
bool hasAddSubU64Insts() const
Definition GCNSubtarget.h:1586

llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition GCNSubtarget.h:1550

llvm::GCNSubtarget::has64BitLiterals
bool has64BitLiterals() const
Definition GCNSubtarget.h:1487

llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition GCNSubtarget.h:66

llvm::GCNSubtarget::FullRate64Ops
bool FullRate64Ops
Definition GCNSubtarget.h:72

llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition GCNSubtarget.h:517

llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition GCNSubtarget.h:679

llvm::GCNSubtarget::hasScalarAtomics
bool hasScalarAtomics() const
Definition GCNSubtarget.h:1118

llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition GCNSubtarget.h:327

llvm::GCNSubtarget::HasR128A16
bool HasR128A16
Definition GCNSubtarget.h:144

llvm::GCNSubtarget::hasUnalignedScratchAccess
bool hasUnalignedScratchAccess() const
Definition GCNSubtarget.h:649

llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition GCNSubtarget.cpp:224

llvm::GCNSubtarget::hasMinimum3Maximum3F16
bool hasMinimum3Maximum3F16() const
Definition GCNSubtarget.h:1462

llvm::GCNSubtarget::HasMinimum3Maximum3F16
bool HasMinimum3Maximum3F16
Definition GCNSubtarget.h:281

llvm::GCNSubtarget::hasSDWAOutModsVOPC
bool hasSDWAOutModsVOPC() const
Definition GCNSubtarget.h:831

llvm::GCNSubtarget::hasAtomicFMinFMaxF32GlobalInsts
bool hasAtomicFMinFMaxF32GlobalInsts() const
Definition GCNSubtarget.h:923

llvm::GCNSubtarget::HasAddSubU64Insts
bool HasAddSubU64Insts
Definition GCNSubtarget.h:285

llvm::GCNSubtarget::HasFmacF64Inst
bool HasFmacF64Inst
Definition GCNSubtarget.h:152

llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition GCNSubtarget.cpp:457

llvm::GCNSubtarget::hasLdsBarrierArriveAtomic
bool hasLdsBarrierArriveAtomic() const
Definition GCNSubtarget.h:1491

llvm::GCNSubtarget::hasGFX950Insts
bool hasGFX950Insts() const
Definition GCNSubtarget.h:1411

llvm::GCNSubtarget::HasNoDataDepHazard
bool HasNoDataDepHazard
Definition GCNSubtarget.h:222

llvm::GCNSubtarget::hasCvtNormInsts
bool hasCvtNormInsts() const
Definition GCNSubtarget.h:911

llvm::GCNSubtarget::HasVOPDInsts
bool HasVOPDInsts
Definition GCNSubtarget.h:274

llvm::GCNSubtarget::HasDot7Insts
bool HasDot7Insts
Definition GCNSubtarget.h:159

llvm::GCNSubtarget::has45BitNumRecordsBufferResource
bool has45BitNumRecordsBufferResource() const
Definition GCNSubtarget.h:1907

llvm::GCNSubtarget::Has64BitLiterals
bool Has64BitLiterals
Definition GCNSubtarget.h:245

llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition GCNSubtarget.h:361

llvm::GCNSubtarget::getMaxNumPreloadedSGPRs
unsigned getMaxNumPreloadedSGPRs() const
Definition GCNSubtarget.cpp:511

llvm::GCNSubtarget::hasAtomicCSubNoRtnInsts
bool hasAtomicCSubNoRtnInsts() const
Definition GCNSubtarget.h:1563

llvm::GCNSubtarget::HasSDWAOutModsVOPC
bool HasSDWAOutModsVOPC
Definition GCNSubtarget.h:136

llvm::GCNSubtarget::EnableCuMode
bool EnableCuMode
Definition GCNSubtarget.h:90

llvm::GCNSubtarget::hasScalarFlatScratchInsts
bool hasScalarFlatScratchInsts() const
Definition GCNSubtarget.h:714

llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition GCNSubtarget.cpp:57

llvm::GCNSubtarget::Gen
unsigned Gen
Definition GCNSubtarget.h:64

llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition GCNSubtarget.h:1260

llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition GCNSubtarget.h:1356

llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition GCNSubtarget.h:573

llvm::GCNSubtarget::hasNoDataDepHazard
bool hasNoDataDepHazard() const
Definition GCNSubtarget.h:1028

llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:322

llvm::GCNSubtarget::HasWaitXcnt
bool HasWaitXcnt
Definition GCNSubtarget.h:216

llvm::GCNSubtarget::HasMovrel
bool HasMovrel
Definition GCNSubtarget.h:127

llvm::GCNSubtarget::useVGPRBlockOpsForCSR
bool useVGPRBlockOpsForCSR() const
Definition GCNSubtarget.h:1366

llvm::GCNSubtarget::HasTensorCvtLutInsts
bool HasTensorCvtLutInsts
Definition GCNSubtarget.h:249

llvm::GCNSubtarget::computeOccupancy
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition GCNSubtarget.cpp:440

llvm::GCNSubtarget::hasUnalignedDSAccess
bool hasUnalignedDSAccess() const
Definition GCNSubtarget.h:641

llvm::GCNSubtarget::hasAddMinMaxInsts
bool hasAddMinMaxInsts() const
Definition GCNSubtarget.h:1602

llvm::GCNSubtarget::needsKernArgPreloadProlog
bool needsKernArgPreloadProlog() const
Definition GCNSubtarget.h:1626

llvm::GCNSubtarget::hasRestrictedSOffset
bool hasRestrictedSOffset() const
Definition GCNSubtarget.h:1426

llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition GCNSubtarget.h:485

llvm::GCNSubtarget::hasIntClamp
bool hasIntClamp() const
Definition GCNSubtarget.h:415

llvm::GCNSubtarget::hasGFX10_AEncoding
bool hasGFX10_AEncoding() const
Definition GCNSubtarget.h:1208

llvm::GCNSubtarget::hasFP8E5M3Insts
bool hasFP8E5M3Insts() const
Definition GCNSubtarget.h:917

llvm::GCNSubtarget::hasFlatSegmentOffsetBug
bool hasFlatSegmentOffsetBug() const
Definition GCNSubtarget.h:750

llvm::GCNSubtarget::GFX10_AEncoding
bool GFX10_AEncoding
Definition GCNSubtarget.h:149

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1743

llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1708

llvm::GCNSubtarget::MIMG_R128
bool MIMG_R128
Definition GCNSubtarget.h:106

llvm::GCNSubtarget::hasEmulatedSystemScopeAtomics
bool hasEmulatedSystemScopeAtomics() const
Definition GCNSubtarget.h:990

llvm::GCNSubtarget::hasMadU64U32NoCarry
bool hasMadU64U32NoCarry() const
Definition GCNSubtarget.h:1596

llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition GCNSubtarget.h:569

llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition GCNSubtarget.h:331

llvm::GCNSubtarget::HasVcmpxPermlaneHazard
bool HasVcmpxPermlaneHazard
Definition GCNSubtarget.h:255

llvm::GCNSubtarget::hasPackedFP32Ops
bool hasPackedFP32Ops() const
Definition GCNSubtarget.h:1153

llvm::GCNSubtarget::SupportsXNACK
bool SupportsXNACK
Definition GCNSubtarget.h:82

llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition GCNSubtarget.h:1322

llvm::GCNSubtarget::hasDot6Insts
bool hasDot6Insts() const
Definition GCNSubtarget.h:861

llvm::GCNSubtarget::HasFmaMixInsts
bool HasFmaMixInsts
Definition GCNSubtarget.h:125

llvm::GCNSubtarget::BackOffBarrier
bool BackOffBarrier
Definition GCNSubtarget.h:77

llvm::GCNSubtarget::TrapID
TrapID
Definition GCNSubtarget.h:45

llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
Definition GCNSubtarget.h:47

llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
Definition GCNSubtarget.h:46

llvm::GCNSubtarget::hasGFX940Insts
bool hasGFX940Insts() const
Definition GCNSubtarget.h:1407

llvm::GCNSubtarget::HasScalarAtomics
bool HasScalarAtomics
Definition GCNSubtarget.h:131

llvm::GCNSubtarget::hasFullRate64Ops
bool hasFullRate64Ops() const
Definition GCNSubtarget.h:435

llvm::GCNSubtarget::HasSafeCUPrefetch
bool HasSafeCUPrefetch
Definition GCNSubtarget.h:261

llvm::GCNSubtarget::EnableSIScheduler
bool EnableSIScheduler
Definition GCNSubtarget.h:97

llvm::GCNSubtarget::hasScalarStores
bool hasScalarStores() const
Definition GCNSubtarget.h:1114

llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition GCNSubtarget.h:667

llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition GCNSubtarget.h:1061

llvm::GCNSubtarget::hasLDSFPAtomicAddF64
bool hasLDSFPAtomicAddF64() const
Definition GCNSubtarget.h:1123

llvm::GCNSubtarget::HasAtomicFlatPkAdd16Insts
bool HasAtomicFlatPkAdd16Insts
Definition GCNSubtarget.h:184

llvm::GCNSubtarget::hasFlatGlobalInsts
bool hasFlatGlobalInsts() const
Definition GCNSubtarget.h:697

llvm::GCNSubtarget::HasGloballyAddressableScratch
bool HasGloballyAddressableScratch
Definition GCNSubtarget.h:296

llvm::GCNSubtarget::hasDX10ClampMode
bool hasDX10ClampMode() const
Definition GCNSubtarget.h:1566

llvm::GCNSubtarget::HasOffset3fBug
bool HasOffset3fBug
Definition GCNSubtarget.h:266

llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:675

llvm::GCNSubtarget::HasDot4Insts
bool HasDot4Insts
Definition GCNSubtarget.h:156

llvm::GCNSubtarget::HasDot9Insts
bool HasDot9Insts
Definition GCNSubtarget.h:161

llvm::GCNSubtarget::HasAtomicFMinFMaxF32GlobalInsts
bool HasAtomicFMinFMaxF32GlobalInsts
Definition GCNSubtarget.h:179

llvm::GCNSubtarget::HasDPPSrc1SGPR
bool HasDPPSrc1SGPR
Definition GCNSubtarget.h:140

llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition GCNSubtarget.h:1072

llvm::GCNSubtarget::hasPKF32InstsReplicatingLower32BitsOfScalarInput
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
Definition GCNSubtarget.h:1448

llvm::GCNSubtarget::HasAtomicFMinFMaxF32FlatInsts
bool HasAtomicFMinFMaxF32FlatInsts
Definition GCNSubtarget.h:181

llvm::GCNSubtarget::HasPseudoScalarTrans
bool HasPseudoScalarTrans
Definition GCNSubtarget.h:243

llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition GCNSubtarget.h:1278

llvm::GCNSubtarget::hasScalarSMulU64
bool hasScalarSMulU64() const
Definition GCNSubtarget.h:800

llvm::GCNSubtarget::HasPointSampleAccel
bool HasPointSampleAccel
Definition GCNSubtarget.h:289

llvm::GCNSubtarget::HasDot8Insts
bool HasDot8Insts
Definition GCNSubtarget.h:160

llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition GCNSubtarget.h:390

llvm::GCNSubtarget::hasScratchBaseForwardingHazard
bool hasScratchBaseForwardingHazard() const
Definition GCNSubtarget.h:1875

llvm::GCNSubtarget::HasShaderCyclesRegister
bool HasShaderCyclesRegister
Definition GCNSubtarget.h:219

llvm::GCNSubtarget::FlatInstOffsets
bool FlatInstOffsets
Definition GCNSubtarget.h:224

llvm::GCNSubtarget::hasLerpInst
bool hasLerpInst() const
Definition GCNSubtarget.h:905

llvm::GCNSubtarget::hasIntMinMax64
bool hasIntMinMax64() const
Definition GCNSubtarget.h:1599

llvm::GCNSubtarget::hasShaderCyclesHiLoRegisters
bool hasShaderCyclesHiLoRegisters() const
Definition GCNSubtarget.h:1020

llvm::GCNSubtarget::hasSDWASdst
bool hasSDWASdst() const
Definition GCNSubtarget.h:823

llvm::GCNSubtarget::HasDefaultComponentBroadcast
bool HasDefaultComponentBroadcast
Definition GCNSubtarget.h:198

llvm::GCNSubtarget::HasNoSdstCMPX
bool HasNoSdstCMPX
Definition GCNSubtarget.h:214

llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition GCNSubtarget.h:507

llvm::GCNSubtarget::hasFFBL
bool hasFFBL() const
Definition GCNSubtarget.h:473

llvm::GCNSubtarget::hasNSAEncoding
bool hasNSAEncoding() const
Definition GCNSubtarget.h:1198

llvm::GCNSubtarget::requiresDisjointEarlyClobberAndUndef
bool requiresDisjointEarlyClobberAndUndef() const override
Definition GCNSubtarget.h:1861

llvm::GCNSubtarget::hasVALUReadSGPRHazard
bool hasVALUReadSGPRHazard() const
Definition GCNSubtarget.h:1374

llvm::GCNSubtarget::hasSMemRealTime
bool hasSMemRealTime() const
Definition GCNSubtarget.h:1094

llvm::GCNSubtarget::EnableTgSplit
bool EnableTgSplit
Definition GCNSubtarget.h:89

llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition GCNSubtarget.h:685

llvm::GCNSubtarget::hasDPPBroadcasts
bool hasDPPBroadcasts() const
Definition GCNSubtarget.h:1135

llvm::GCNSubtarget::HasFP8Insts
bool HasFP8Insts
Definition GCNSubtarget.h:167

llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition GCNSubtarget.h:619

llvm::GCNSubtarget::hasMovB64
bool hasMovB64() const
Definition GCNSubtarget.h:1222

llvm::GCNSubtarget::hasVmemPrefInsts
bool hasVmemPrefInsts() const
Definition GCNSubtarget.h:1042

llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1729

llvm::GCNSubtarget::HasFmaMixBF16Insts
bool HasFmaMixBF16Insts
Definition GCNSubtarget.h:126

llvm::GCNSubtarget::hasCubeInsts
bool hasCubeInsts() const
Definition GCNSubtarget.h:903

llvm::GCNSubtarget::HalfRate64Ops
bool HalfRate64Ops
Definition GCNSubtarget.h:71

llvm::GCNSubtarget::HasSafeSmemPrefetch
bool HasSafeSmemPrefetch
Definition GCNSubtarget.h:260

llvm::GCNSubtarget::HasSMemTimeInst
bool HasSMemTimeInst
Definition GCNSubtarget.h:218

llvm::GCNSubtarget::HasFP8ConversionInsts
bool HasFP8ConversionInsts
Definition GCNSubtarget.h:168

llvm::GCNSubtarget::GFX8Insts
bool GFX8Insts
Definition GCNSubtarget.h:108

llvm::GCNSubtarget::hasInstFwdPrefetchBug
bool hasInstFwdPrefetchBug() const
Definition GCNSubtarget.h:1302

llvm::GCNSubtarget::HasDot2Insts
bool HasDot2Insts
Definition GCNSubtarget.h:154

llvm::GCNSubtarget::hasAtomicFMinFMaxF64GlobalInsts
bool hasAtomicFMinFMaxF64GlobalInsts() const
Definition GCNSubtarget.h:927

llvm::GCNSubtarget::HasLerpInst
bool HasLerpInst
Definition GCNSubtarget.h:170

llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition GCNSubtarget.h:481

llvm::GCNSubtarget::HasVMemToLDSLoad
bool HasVMemToLDSLoad
Definition GCNSubtarget.h:207

llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:426

llvm::GCNSubtarget::hasUnalignedScratchAccessEnabled
bool hasUnalignedScratchAccessEnabled() const
Definition GCNSubtarget.h:653

llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition GCNSubtarget.h:1098

llvm::GCNSubtarget::HasVOP3Literal
bool HasVOP3Literal
Definition GCNSubtarget.h:221

llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition GCNSubtarget.h:1392

llvm::GCNSubtarget::UnalignedScratchAccess
bool UnalignedScratchAccess
Definition GCNSubtarget.h:78

llvm::GCNSubtarget::hasAtomicFlatPkAdd16Insts
bool hasAtomicFlatPkAdd16Insts() const
Definition GCNSubtarget.h:941

llvm::GCNSubtarget::HasPrngInst
bool HasPrngInst
Definition GCNSubtarget.h:251

llvm::GCNSubtarget::HasImageGather4D16Bug
bool HasImageGather4D16Bug
Definition GCNSubtarget.h:269

llvm::GCNSubtarget::hasBFI
bool hasBFI() const
Definition GCNSubtarget.h:461

llvm::GCNSubtarget::hasDot13Insts
bool hasDot13Insts() const
Definition GCNSubtarget.h:889

llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition GCNSubtarget.h:772

llvm::GCNSubtarget::HasSMEMtoVectorWriteHazard
bool HasSMEMtoVectorWriteHazard
Definition GCNSubtarget.h:257

llvm::GCNSubtarget::HasAshrPkInsts
bool HasAshrPkInsts
Definition GCNSubtarget.h:278

llvm::GCNSubtarget::HasAtomicFaddNoRtnInsts
bool HasAtomicFaddNoRtnInsts
Definition GCNSubtarget.h:186

llvm::GCNSubtarget::hasSMEMtoVectorWriteHazard
bool hasSMEMtoVectorWriteHazard() const
Definition GCNSubtarget.h:1294

llvm::GCNSubtarget::useAA
bool useAA() const override
Definition GCNSubtarget.cpp:395

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition GCNSubtarget.h:1787

llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition GCNSubtarget.h:1102

llvm::GCNSubtarget::HasGDS
bool HasGDS
Definition GCNSubtarget.h:232

llvm::GCNSubtarget::HasAtomicBufferGlobalPkAddF16Insts
bool HasAtomicBufferGlobalPkAddF16Insts
Definition GCNSubtarget.h:189

llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition GCNSubtarget.cpp:403

llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition GCNSubtarget.h:637

llvm::GCNSubtarget::isWaveSizeKnown
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
Definition GCNSubtarget.h:1798

llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition GCNSubtarget.h:398

llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition GCNSubtarget.h:1814

llvm::GCNSubtarget::HasAtomicCSubNoRtnInsts
bool HasAtomicCSubNoRtnInsts
Definition GCNSubtarget.h:190

llvm::GCNSubtarget::hasImageInsts
bool hasImageInsts() const
Definition GCNSubtarget.h:1168

llvm::GCNSubtarget::HasAtomicDsPkAdd16Insts
bool HasAtomicDsPkAdd16Insts
Definition GCNSubtarget.h:183

llvm::GCNSubtarget::hasImageGather4D16Bug
bool hasImageGather4D16Bug() const
Definition GCNSubtarget.h:1190

llvm::GCNSubtarget::FMA
bool FMA
Definition GCNSubtarget.h:105

llvm::GCNSubtarget::HasRequiredExportPriority
bool HasRequiredExportPriority
Definition GCNSubtarget.h:276

llvm::GCNSubtarget::hasFMA
bool hasFMA() const
Definition GCNSubtarget.h:499

llvm::GCNSubtarget::hasDot10Insts
bool hasDot10Insts() const
Definition GCNSubtarget.h:877

llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition GCNSubtarget.h:1384

llvm::GCNSubtarget::hasVMEMtoScalarWriteHazard
bool hasVMEMtoScalarWriteHazard() const
Definition GCNSubtarget.h:1290

llvm::GCNSubtarget::hasCvtFP8VOP1Bug
bool hasCvtFP8VOP1Bug() const
Definition GCNSubtarget.h:1559

llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition GCNSubtarget.h:582

llvm::GCNSubtarget::supportsWave64
bool supportsWave64() const
Definition GCNSubtarget.h:1785

llvm::GCNSubtarget::RelaxedBufferOOBMode
bool RelaxedBufferOOBMode
Definition GCNSubtarget.h:80

llvm::GCNSubtarget::HasAtomicBufferPkAddBF16Inst
bool HasAtomicBufferPkAddBF16Inst
Definition GCNSubtarget.h:192

llvm::GCNSubtarget::hasNegativeUnalignedScratchOffsetBug
bool hasNegativeUnalignedScratchOffsetBug() const
Definition GCNSubtarget.h:1252

llvm::GCNSubtarget::hasFFBH
bool hasFFBH() const
Definition GCNSubtarget.h:477

llvm::GCNSubtarget::hasFormattedMUBUFInsts
bool hasFormattedMUBUFInsts() const
Definition GCNSubtarget.h:733

llvm::GCNSubtarget::HasQsadInsts
bool HasQsadInsts
Definition GCNSubtarget.h:172

llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition GCNSubtarget.h:712

llvm::GCNSubtarget::supportsWGP
bool supportsWGP() const
Definition GCNSubtarget.h:409

llvm::GCNSubtarget::hasG16
bool hasG16() const
Definition GCNSubtarget.h:1182

llvm::GCNSubtarget::GFX90AInsts
bool GFX90AInsts
Definition GCNSubtarget.h:110

llvm::GCNSubtarget::HasMSAALoadDstSelBug
bool HasMSAALoadDstSelBug
Definition GCNSubtarget.h:270

llvm::GCNSubtarget::HasPackedFP32Ops
bool HasPackedFP32Ops
Definition GCNSubtarget.h:141

llvm::GCNSubtarget::hasHalfRate64Ops
bool hasHalfRate64Ops() const
Definition GCNSubtarget.h:431

llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition GCNSubtarget.h:943

llvm::GCNSubtarget::HasAtomicBufferGlobalPkAddF16NoRtnInsts
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts
Definition GCNSubtarget.h:188

llvm::GCNSubtarget::HasMinimum3Maximum3PKF16
bool HasMinimum3Maximum3PKF16
Definition GCNSubtarget.h:283

llvm::GCNSubtarget::hasSubClampInsts
bool hasSubClampInsts() const
Definition GCNSubtarget.h:1632

llvm::GCNSubtarget::hasPermlane16Swap
bool hasPermlane16Swap() const
Definition GCNSubtarget.h:1454

llvm::GCNSubtarget::hasNSAtoVMEMBug
bool hasNSAtoVMEMBug() const
Definition GCNSubtarget.h:1336

llvm::GCNSubtarget::requiresWaitXCntForSingleAccessInstructions
bool requiresWaitXCntForSingleAccessInstructions() const
Definition GCNSubtarget.h:1891

llvm::GCNSubtarget::RequiresAlignVGPR
bool RequiresAlignVGPR
Definition GCNSubtarget.h:208

llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition GCNSubtarget.h:229

llvm::GCNSubtarget::CIInsts
bool CIInsts
Definition GCNSubtarget.h:107

llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize(bool HasSampler=false) const
Definition GCNSubtarget.h:1204

llvm::GCNSubtarget::hasAtomicBufferGlobalPkAddF16NoRtnInsts
bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const
Definition GCNSubtarget.h:951

llvm::GCNSubtarget::hasSadInsts
bool hasSadInsts() const
Definition GCNSubtarget.h:907

llvm::GCNSubtarget::hasMIMG_R128
bool hasMIMG_R128() const
Definition GCNSubtarget.h:423

llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition GCNSubtarget.cpp:397

llvm::GCNSubtarget::HasCvtNormInsts
bool HasCvtNormInsts
Definition GCNSubtarget.h:173

llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition GCNSubtarget.h:1350

llvm::GCNSubtarget::hasAtomicBufferPkAddBF16Inst
bool hasAtomicBufferPkAddBF16Inst() const
Definition GCNSubtarget.h:963

llvm::GCNSubtarget::HasAgentScopeFineGrainedRemoteMemoryAtomics
bool HasAgentScopeFineGrainedRemoteMemoryAtomics
Definition GCNSubtarget.h:196

llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition GCNSubtarget.h:1819

llvm::GCNSubtarget::hasDPP8
bool hasDPP8() const
Definition GCNSubtarget.h:1143

llvm::GCNSubtarget::hasDot5Insts
bool hasDot5Insts() const
Definition GCNSubtarget.h:857

llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition GCNSubtarget.h:1090

llvm::GCNSubtarget::hasTransposeLoadF4F6Insts
bool hasTransposeLoadF4F6Insts() const
Definition GCNSubtarget.h:1480

llvm::GCNSubtarget::hasMadU32Inst
bool hasMadU32Inst() const
Definition GCNSubtarget.h:1589

llvm::GCNSubtarget::hasAtomicFaddNoRtnInsts
bool hasAtomicFaddNoRtnInsts() const
Definition GCNSubtarget.h:949

llvm::GCNSubtarget::MaxHardClauseLength
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition GCNSubtarget.h:203

llvm::GCNSubtarget::hasPermLaneX16
bool hasPermLaneX16() const
Definition GCNSubtarget.h:1126

llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition GCNSubtarget.h:1398

llvm::GCNSubtarget::hasFlatBufferGlobalAtomicFaddF64Inst
bool hasFlatBufferGlobalAtomicFaddF64Inst() const
Definition GCNSubtarget.h:971

llvm::GCNSubtarget::HasEmulatedSystemScopeAtomics
bool HasEmulatedSystemScopeAtomics
Definition GCNSubtarget.h:197

llvm::GCNSubtarget::hasNoF16PseudoScalarTransInlineConstants
bool hasNoF16PseudoScalarTransInlineConstants() const
Definition GCNSubtarget.h:1438

llvm::GCNSubtarget::hasIEEEMode
bool hasIEEEMode() const
Definition GCNSubtarget.h:1569

llvm::GCNSubtarget::hasScalarDwordx3Loads
bool hasScalarDwordx3Loads() const
Definition GCNSubtarget.h:1112

llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition GCNSubtarget.h:1332

llvm::GCNSubtarget::FastDenormalF32
bool FastDenormalF32
Definition GCNSubtarget.h:70

llvm::GCNSubtarget::hasSignedGVSOffset
bool hasSignedGVSOffset() const
Definition GCNSubtarget.h:1232

llvm::GCNSubtarget::hasCvtPkNormVOP2Insts
bool hasCvtPkNormVOP2Insts() const
Definition GCNSubtarget.h:913

llvm::GCNSubtarget::HasImageStoreD16Bug
bool HasImageStoreD16Bug
Definition GCNSubtarget.h:268

llvm::GCNSubtarget::UnalignedAccessMode
bool UnalignedAccessMode
Definition GCNSubtarget.h:79

llvm::GCNSubtarget::hasLDSFPAtomicAddF32
bool hasLDSFPAtomicAddF32() const
Definition GCNSubtarget.h:1122

llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition GCNSubtarget.h:1826

llvm::GCNSubtarget::HasLshlAddU64Inst
bool HasLshlAddU64Inst
Definition GCNSubtarget.h:284

llvm::GCNSubtarget::EnableDS128
bool EnableDS128
Definition GCNSubtarget.h:98

llvm::GCNSubtarget::HasApertureRegs
bool HasApertureRegs
Definition GCNSubtarget.h:81

llvm::GCNSubtarget::hasBFM
bool hasBFM() const
Definition GCNSubtarget.h:465

llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition GCNSubtarget.h:607

llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition GCNSubtarget.h:1401

llvm::GCNSubtarget::HasNSAtoVMEMBug
bool HasNSAtoVMEMBug
Definition GCNSubtarget.h:264

llvm::GCNSubtarget::GFX950Insts
bool GFX950Insts
Definition GCNSubtarget.h:112

llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition GCNSubtarget.h:1273

llvm::GCNSubtarget::HasA16
bool HasA16
Definition GCNSubtarget.h:145

llvm::GCNSubtarget::AssemblerPermissiveWavesize
bool AssemblerPermissiveWavesize
Definition GCNSubtarget.h:101

llvm::GCNSubtarget::hasDot8Insts
bool hasDot8Insts() const
Definition GCNSubtarget.h:869

llvm::GCNSubtarget::hasVectorMulU64
bool hasVectorMulU64() const
Definition GCNSubtarget.h:1592

llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition GCNSubtarget.h:511

llvm::GCNSubtarget::hasSCmpK
bool hasSCmpK() const
Definition GCNSubtarget.h:1049

llvm::GCNSubtarget::HasFP8E5M3Insts
bool HasFP8E5M3Insts
Definition GCNSubtarget.h:176

llvm::GCNSubtarget::hasPseudoScalarTrans
bool hasPseudoScalarTrans() const
Definition GCNSubtarget.h:1424

llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition GCNSubtarget.h:353

llvm::GCNSubtarget::requiresWaitIdleBeforeGetReg
bool requiresWaitIdleBeforeGetReg() const
Definition GCNSubtarget.h:1854

llvm::GCNSubtarget::HasSDWASdst
bool HasSDWASdst
Definition GCNSubtarget.h:134

llvm::GCNSubtarget::hasPointSampleAccel
bool hasPointSampleAccel() const
Definition GCNSubtarget.h:1489

llvm::GCNSubtarget::hasDot12Insts
bool hasDot12Insts() const
Definition GCNSubtarget.h:885

llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition GCNSubtarget.h:602

llvm::GCNSubtarget::hasGWS
bool hasGWS() const
Definition GCNSubtarget.h:1541

llvm::GCNSubtarget::HasCvtPkNormVOP3Insts
bool HasCvtPkNormVOP3Insts
Definition GCNSubtarget.h:175

llvm::GCNSubtarget::HasAtomicFMinFMaxF64GlobalInsts
bool HasAtomicFMinFMaxF64GlobalInsts
Definition GCNSubtarget.h:180

llvm::GCNSubtarget::HasBitOp3Insts
bool HasBitOp3Insts
Definition GCNSubtarget.h:247

llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition GCNSubtarget.h:1282

llvm::GCNSubtarget::useFlatForGlobal
bool useFlatForGlobal() const
Definition GCNSubtarget.h:591

llvm::GCNSubtarget::EnableFlatScratch
bool EnableFlatScratch
Definition GCNSubtarget.h:230

llvm::GCNSubtarget::hasHalfRate64Ops
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)

llvm::GCNSubtarget::hasVOPDInsts
bool hasVOPDInsts() const
Definition GCNSubtarget.h:1396

llvm::GCNSubtarget::hasGFX10_BEncoding
bool hasGFX10_BEncoding() const
Definition GCNSubtarget.h:1212

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition GCNSubtarget.h:371

llvm::GCNSubtarget::GFX7GFX8GFX9Insts
bool GFX7GFX8GFX9Insts
Definition GCNSubtarget.h:118

llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition GCNSubtarget.cpp:167

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition GCNSubtarget.h:1662

llvm::GCNSubtarget::hasVOP3Literal
bool hasVOP3Literal() const
Definition GCNSubtarget.h:1024

llvm::GCNSubtarget::HasDot10Insts
bool HasDot10Insts
Definition GCNSubtarget.h:162

llvm::GCNSubtarget::hasAtomicBufferGlobalPkAddF16Insts
bool hasAtomicBufferGlobalPkAddF16Insts() const
Definition GCNSubtarget.h:955

llvm::GCNSubtarget::getMaxNumVectorRegs
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
Definition GCNSubtarget.cpp:574

llvm::GCNSubtarget::HasSDWAMac
bool HasSDWAMac
Definition GCNSubtarget.h:135

llvm::GCNSubtarget::hasNoSdstCMPX
bool hasNoSdstCMPX() const
Definition GCNSubtarget.h:1000

llvm::GCNSubtarget::HasDot12Insts
bool HasDot12Insts
Definition GCNSubtarget.h:164

llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition GCNSubtarget.h:671

llvm::GCNSubtarget::hasScalarAddSub64
bool hasScalarAddSub64() const
Definition GCNSubtarget.h:798

llvm::GCNSubtarget::hasSplitBarriers
bool hasSplitBarriers() const
Definition GCNSubtarget.h:1556

llvm::GCNSubtarget::hasUnpackedD16VMem
bool hasUnpackedD16VMem() const
Definition GCNSubtarget.h:802

llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition GCNSubtarget.h:1078

llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition GCNSubtarget.h:554

llvm::GCNSubtarget::hasSGetShaderCyclesInst
bool hasSGetShaderCyclesInst() const
Definition GCNSubtarget.h:1611

llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition GCNSubtarget.h:564

llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition GCNSubtarget.h:560

llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition GCNSubtarget.h:75

llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition GCNSubtarget.h:708

llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition GCNSubtarget.cpp:410

llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition GCNSubtarget.h:786

llvm::GCNSubtarget::hasDPALU_DPP
bool hasDPALU_DPP() const
Definition GCNSubtarget.h:1147

llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition GCNSubtarget.h:1234

llvm::GCNSubtarget::hasAtomicGlobalPkAddBF16Inst
bool hasAtomicGlobalPkAddBF16Inst() const
Definition GCNSubtarget.h:959

llvm::GCNSubtarget::FlatScratchInsts
bool FlatScratchInsts
Definition GCNSubtarget.h:226

llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition GCNSubtarget.h:439

llvm::GCNSubtarget::HasVMEMtoScalarWriteHazard
bool HasVMEMtoScalarWriteHazard
Definition GCNSubtarget.h:256

llvm::GCNSubtarget::HasAddMinMaxInsts
bool HasAddMinMaxInsts
Definition GCNSubtarget.h:287

llvm::GCNSubtarget::SGPRInitBug
bool SGPRInitBug
Definition GCNSubtarget.h:119

llvm::GCNSubtarget::HasAtomicGlobalPkAddBF16Inst
bool HasAtomicGlobalPkAddBF16Inst
Definition GCNSubtarget.h:191

llvm::GCNSubtarget::hasUnalignedAccessMode
bool hasUnalignedAccessMode() const
Definition GCNSubtarget.h:657

llvm::GCNSubtarget::FP64
bool FP64
Definition GCNSubtarget.h:104

llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition GCNSubtarget.h:1650

llvm::GCNSubtarget::HasDot1Insts
bool HasDot1Insts
Definition GCNSubtarget.h:153

llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition GCNSubtarget.h:543

llvm::GCNSubtarget::HasAtomicFaddRtnInsts
bool HasAtomicFaddRtnInsts
Definition GCNSubtarget.h:185

llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition GCNSubtarget.h:1791

llvm::GCNSubtarget::HasRestrictedSOffset
bool HasRestrictedSOffset
Definition GCNSubtarget.h:244

llvm::GCNSubtarget::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition GCNSubtarget.h:1857

llvm::GCNSubtarget::RequiresCOV6
bool RequiresCOV6
Definition GCNSubtarget.h:294

llvm::GCNSubtarget::HasArchitectedSGPRs
bool HasArchitectedSGPRs
Definition GCNSubtarget.h:231

llvm::GCNSubtarget::TrapHandler
bool TrapHandler
Definition GCNSubtarget.h:91

llvm::GCNSubtarget::hasFmaMixInsts
bool hasFmaMixInsts() const
Definition GCNSubtarget.h:489

llvm::GCNSubtarget::HasVALUTransUseHazard
bool HasVALUTransUseHazard
Definition GCNSubtarget.h:275

llvm::GCNSubtarget::hasCARRY
bool hasCARRY() const
Definition GCNSubtarget.h:495

llvm::GCNSubtarget::hasQsadInsts
bool hasQsadInsts() const
Definition GCNSubtarget.h:909

llvm::GCNSubtarget::HasSMemRealTime
bool HasSMemRealTime
Definition GCNSubtarget.h:123

llvm::GCNSubtarget::hasPackedTID
bool hasPackedTID() const
Definition GCNSubtarget.h:1403

llvm::GCNSubtarget::setRegModeNeedsVNOPs
bool setRegModeNeedsVNOPs() const
Definition GCNSubtarget.h:1376

llvm::GCNSubtarget::HasVcmpxExecWARHazard
bool HasVcmpxExecWARHazard
Definition GCNSubtarget.h:262

llvm::GCNSubtarget::HasNSAEncoding
bool HasNSAEncoding
Definition GCNSubtarget.h:147

llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition GCNSubtarget.h:419

llvm::GCNSubtarget::hasAddNoCarry
bool hasAddNoCarry() const
Definition GCNSubtarget.h:794

llvm::GCNSubtarget::ScalarFlatScratchInsts
bool ScalarFlatScratchInsts
Definition GCNSubtarget.h:228

llvm::GCNSubtarget::requiresWaitsBeforeSystemScopeStores
bool requiresWaitsBeforeSystemScopeStores() const
Definition GCNSubtarget.h:1911

llvm::GCNSubtarget::hasVALUTransUseHazard
bool hasVALUTransUseHazard() const
Definition GCNSubtarget.h:1360

llvm::GCNSubtarget::HasDot5Insts
bool HasDot5Insts
Definition GCNSubtarget.h:157

llvm::GCNSubtarget::hasShaderCyclesRegister
bool hasShaderCyclesRegister() const
Definition GCNSubtarget.h:1016

llvm::GCNSubtarget::HasCvtFP8Vop1Bug
bool HasCvtFP8Vop1Bug
Definition GCNSubtarget.h:177

llvm::GCNSubtarget::HasMFMAInlineLiteralBug
bool HasMFMAInlineLiteralBug
Definition GCNSubtarget.h:237

llvm::GCNSubtarget::HasIntClamp
bool HasIntClamp
Definition GCNSubtarget.h:124

llvm::GCNSubtarget::HasVmemWriteVgprInOrder
bool HasVmemWriteVgprInOrder
Definition GCNSubtarget.h:277

llvm::GCNSubtarget::UnalignedBufferAccess
bool UnalignedBufferAccess
Definition GCNSubtarget.h:238

llvm::GCNSubtarget::HasUnpackedD16VMem
bool HasUnpackedD16VMem
Definition GCNSubtarget.h:235

llvm::GCNSubtarget::hasSALUFloatInsts
bool hasSALUFloatInsts() const
Definition GCNSubtarget.h:1422

llvm::GCNSubtarget::EnableUnsafeDSOffsetFolding
bool EnableUnsafeDSOffsetFolding
Definition GCNSubtarget.h:96

llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition GCNSubtarget.h:453

llvm::GCNSubtarget::isPreciseMemoryEnabled
bool isPreciseMemoryEnabled() const
Definition GCNSubtarget.h:683

llvm::GCNSubtarget::hasDPPSrc1SGPR
bool hasDPPSrc1SGPR() const
Definition GCNSubtarget.h:1151

llvm::GCNSubtarget::hasGDS
bool hasGDS() const
Definition GCNSubtarget.h:1538

llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition GCNSubtarget.h:375

llvm::GCNSubtarget::GFX12Insts
bool GFX12Insts
Definition GCNSubtarget.h:115

llvm::GCNSubtarget::HasDefaultComponentZero
bool HasDefaultComponentZero
Definition GCNSubtarget.h:195

llvm::GCNSubtarget::HasMemoryAtomicFaddF32DenormalSupport
bool HasMemoryAtomicFaddF32DenormalSupport
Definition GCNSubtarget.h:187

llvm::GCNSubtarget::HasPkAddMinMaxInsts
bool HasPkAddMinMaxInsts
Definition GCNSubtarget.h:288

llvm::GCNSubtarget::hasMTBUFInsts
bool hasMTBUFInsts() const
Definition GCNSubtarget.h:731

llvm::GCNSubtarget::hasDot4Insts
bool hasDot4Insts() const
Definition GCNSubtarget.h:853

llvm::GCNSubtarget::HasSWakeupBarrier
bool HasSWakeupBarrier
Definition GCNSubtarget.h:292

llvm::GCNSubtarget::FeatureDisable
bool FeatureDisable
Definition GCNSubtarget.h:304

llvm::GCNSubtarget::GFX1250Insts
bool GFX1250Insts
Definition GCNSubtarget.h:116

llvm::GCNSubtarget::flatScratchIsArchitected
bool flatScratchIsArchitected() const
Definition GCNSubtarget.h:1532

llvm::GCNSubtarget::hasPartialNSAEncoding
bool hasPartialNSAEncoding() const
Definition GCNSubtarget.h:1202

llvm::GCNSubtarget::HasInstFwdPrefetchBug
bool HasInstFwdPrefetchBug
Definition GCNSubtarget.h:258

llvm::GCNSubtarget::hasWaitXCnt
bool hasWaitXCnt() const
Definition GCNSubtarget.h:1484

llvm::GCNSubtarget::checkSubtargetFeatures
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Definition GCNSubtarget.cpp:158

llvm::GCNSubtarget::HasPermlane32Swap
bool HasPermlane32Swap
Definition GCNSubtarget.h:254

llvm::GCNSubtarget::Has1024AddressableVGPRs
bool Has1024AddressableVGPRs
Definition GCNSubtarget.h:246

llvm::GCNSubtarget::hasSetPrioIncWgInst
bool hasSetPrioIncWgInst() const
Definition GCNSubtarget.h:1614

llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override

llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition GCNSubtarget.cpp:192

llvm::GCNSubtarget::FlatAddressSpace
bool FlatAddressSpace
Definition GCNSubtarget.h:223

llvm::GCNSubtarget::hasDot9Insts
bool hasDot9Insts() const
Definition GCNSubtarget.h:873

llvm::GCNSubtarget::HasPackedTID
bool HasPackedTID
Definition GCNSubtarget.h:240

llvm::GCNSubtarget::hasVOPD3
bool hasVOPD3() const
Definition GCNSubtarget.h:1583

llvm::GCNSubtarget::UseBlockVGPROpsForCSR
bool UseBlockVGPROpsForCSR
Definition GCNSubtarget.h:295

llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition GCNSubtarget.h:727

llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition GCNSubtarget.h:63

llvm::GCNSubtarget::hasDefaultComponentBroadcast
bool hasDefaultComponentBroadcast() const
Definition GCNSubtarget.h:996

llvm::GCNSubtarget::requiresCodeObjectV6
bool requiresCodeObjectV6() const
Definition GCNSubtarget.h:1364

llvm::GCNSubtarget::SupportsSRAMECC
bool SupportsSRAMECC
Definition GCNSubtarget.h:204

llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition GCNSubtarget.h:341

llvm::GCNSubtarget::hasBFE
bool hasBFE() const
Definition GCNSubtarget.h:457

llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition GCNSubtarget.h:1352

llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition GCNSubtarget.h:781

llvm::GCNSubtarget::HasFlatBufferGlobalAtomicFaddF64Inst
bool HasFlatBufferGlobalAtomicFaddF64Inst
Definition GCNSubtarget.h:194

llvm::GCNTargetMachine
Definition AMDGPUTargetMachine.h:81

llvm::GCNUserSGPRUsageInfo::getNumUserSGPRForField
static unsigned getNumUserSGPRForField(UserSGPRID ID)
Definition GCNSubtarget.h:1954

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition GCNSubtarget.h:1924

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition GCNSubtarget.h:1926

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition GCNSubtarget.cpp:754

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition GCNSubtarget.h:1928

llvm::GCNUserSGPRUsageInfo::UserSGPRID
UserSGPRID
Definition GCNSubtarget.h:1942

llvm::GCNUserSGPRUsageInfo::ImplicitBufferPtrID
@ ImplicitBufferPtrID
Definition GCNSubtarget.h:1943

llvm::GCNUserSGPRUsageInfo::DispatchIdID
@ DispatchIdID
Definition GCNSubtarget.h:1948

llvm::GCNUserSGPRUsageInfo::QueuePtrID
@ QueuePtrID
Definition GCNSubtarget.h:1946

llvm::GCNUserSGPRUsageInfo::DispatchPtrID
@ DispatchPtrID
Definition GCNSubtarget.h:1945

llvm::GCNUserSGPRUsageInfo::FlatScratchInitID
@ FlatScratchInitID
Definition GCNSubtarget.h:1949

llvm::GCNUserSGPRUsageInfo::PrivateSegmentBufferID
@ PrivateSegmentBufferID
Definition GCNSubtarget.h:1944

llvm::GCNUserSGPRUsageInfo::PrivateSegmentSizeID
@ PrivateSegmentSizeID
Definition GCNSubtarget.h:1950

llvm::GCNUserSGPRUsageInfo::KernargSegmentPtrID
@ KernargSegmentPtrID
Definition GCNSubtarget.h:1947

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition GCNSubtarget.h:1920

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition GCNSubtarget.cpp:760

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition GCNSubtarget.h:1918

llvm::GCNUserSGPRUsageInfo::getNumKernargPreloadSGPRs
unsigned getNumKernargPreloadSGPRs() const
Definition GCNSubtarget.h:1934

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition GCNSubtarget.h:1932

llvm::GCNUserSGPRUsageInfo::getNumUsedUserSGPRs
unsigned getNumUsedUserSGPRs() const
Definition GCNSubtarget.h:1936

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition GCNSubtarget.h:1922

llvm::GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Definition GCNSubtarget.cpp:690

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1930

llvm::InlineAsmLowering
Definition InlineAsmLowering.h:28

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition MCInstrItineraries.h:110

llvm::InstructionSelector
Definition InstructionSelector.h:22

llvm::LegalizerInfo
Definition LegalizerInfo.h:1341

llvm::Legalizer
Definition Legalizer.h:38

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::Region
Definition RegionInfo.h:887

llvm::SDep
Scheduling dependency.
Definition ScheduleDAG.h:51

llvm::SIFrameLowering
Definition SIFrameLowering.h:17

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition SIRegisterInfo.h:376

llvm::SITargetLowering
Definition SIISelLowering.h:32

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition SelectionDAGTargetInfo.h:33

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:66

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1372

llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1389

llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1226

llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1196

llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1234

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1253

llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1262

llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
Definition AMDGPUBaseInfo.cpp:1274

llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1232

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1350

llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
Definition AMDGPUBaseInfo.cpp:1291

llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1215

llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1490

llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1244

llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1463

llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1400

llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1255

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1407

llvm::AMDGPU::isShader
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1433

llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2462

llvm::AMDGPU::getNSAMaxSize
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition AMDGPUBaseInfo.cpp:2451

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition MachineScheduler.h:199

llvm::SchedRegion
A region of an MBB for scheduling.
Definition MachineScheduler.h:222