doxygen/GCNSubtarget_8cpp_source.html

//===-- GCNSubtarget.cpp - GCN Subtarget Information ----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Implements the GCN specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#include "GCNSubtarget.h"

#include "AMDGPUCallLowering.h"

#include "AMDGPUInstructionSelector.h"

#include "AMDGPULegalizerInfo.h"

#include "AMDGPURegisterBankInfo.h"

#include "AMDGPUSelectionDAGInfo.h"

#include "AMDGPUTargetMachine.h"

#include "SIMachineFunctionInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/SmallString.h"

#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"

#include "llvm/CodeGen/MachineScheduler.h"

#include "llvm/CodeGen/TargetFrameLowering.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/MDBuilder.h"

#include <algorithm>


using namespace llvm;


#define DEBUG_TYPE "gcn-subtarget"


#define GET_SUBTARGETINFO_TARGET_DESC

#define GET_SUBTARGETINFO_CTOR

#define AMDGPUSubtarget GCNSubtarget

#include "AMDGPUGenSubtargetInfo.inc"

#undef AMDGPUSubtarget


static cl::opt<bool> EnableVGPRIndexMode(

    "amdgpu-vgpr-index-mode",

    cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),

    cl::init(false));


static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",

                           cl::desc("Enable the use of AA during codegen."),

                           cl::init(true));


static cl::opt<unsigned>

    NSAThreshold("amdgpu-nsa-threshold",

                 cl::desc("Number of addresses from which to enable MIMG NSA."),

                 cl::init(2), cl::Hidden);


GCNSubtarget::~GCNSubtarget() = default;


GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,

                                                            StringRef GPU,

                                                            StringRef FS) {

  // Determine default and user-specified characteristics

  //

  // We want to be able to turn these off, but making this a subtarget feature

  // for SI has the unhelpful behavior that it unsets everything else if you

  // disable it.

  //

  // Similarly we want enable-prt-strict-null to be on by default and not to

  // unset everything else if it is disabled


  SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,");


  // Turn on features that HSA ABI requires. Also turn on FlatForGlobal by

  // default

  if (isAmdHsaOS())

    FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";


  FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS


  // Disable mutually exclusive bits.

  if (FS.contains_insensitive("+wavefrontsize")) {

    if (!FS.contains_insensitive("wavefrontsize16"))

      FullFS += "-wavefrontsize16,";

    if (!FS.contains_insensitive("wavefrontsize32"))

      FullFS += "-wavefrontsize32,";

    if (!FS.contains_insensitive("wavefrontsize64"))

      FullFS += "-wavefrontsize64,";

  }


  FullFS += FS;


  ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);


  // Implement the "generic" processors, which acts as the default when no

  // generation features are enabled (e.g for -mcpu=''). HSA OS defaults to

  // the first amdgcn target that supports flat addressing. Other OSes defaults

  // to the first amdgcn target.

  if (Gen == AMDGPUSubtarget::INVALID) {

    Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS

                                       : AMDGPUSubtarget::SOUTHERN_ISLANDS;

    // Assume wave64 for the unknown target, if not explicitly set.

    if (getWavefrontSizeLog2() == 0)

      WavefrontSizeLog2 = 6;

  } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&

             !hasFeature(AMDGPU::FeatureWavefrontSize64)) {

    // If there is no default wave size it must be a generation before gfx10,

    // these have FeatureWavefrontSize64 in their definition already. For gfx10+

    // set wave32 as a default.

    ToggleFeature(AMDGPU::FeatureWavefrontSize32);

    WavefrontSizeLog2 = getGeneration() >= AMDGPUSubtarget::GFX10 ? 5 : 6;

  }


  // We don't support FP64 for EG/NI atm.

  assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));


  // Targets must either support 64-bit offsets for MUBUF instructions, and/or

  // support flat operations, otherwise they cannot access a 64-bit global

  // address space

  assert(hasAddr64() || hasFlat());

  // Unless +-flat-for-global is specified, turn on FlatForGlobal for targets

  // that do not support ADDR64 variants of MUBUF instructions. Such targets

  // cannot use a 64 bit offset with a MUBUF instruction to access the global

  // address space

  if (!hasAddr64() && !FS.contains("flat-for-global") && !FlatForGlobal) {

    ToggleFeature(AMDGPU::FeatureFlatForGlobal);

    FlatForGlobal = true;

  }

  // Unless +-flat-for-global is specified, use MUBUF instructions for global

  // address space access if flat operations are not available.

  if (!hasFlat() && !FS.contains("flat-for-global") && FlatForGlobal) {

    ToggleFeature(AMDGPU::FeatureFlatForGlobal);

    FlatForGlobal = false;

  }


  // Set defaults if needed.

  if (MaxPrivateElementSize == 0)

    MaxPrivateElementSize = 4;


  if (LDSBankCount == 0)

    LDSBankCount = 32;


  if (AddressableLocalMemorySize == 0)

    AddressableLocalMemorySize = 32768;


  LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(this);


  HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;

  HasSMulHi = getGeneration() >= AMDGPUSubtarget::GFX9;


  TargetID.setTargetIDFromFeaturesString(FS);


  LLVM_DEBUG(dbgs() << "xnack setting for subtarget: "

                    << TargetID.getXnackSetting() << '\n');

  LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "

                    << TargetID.getSramEccSetting() << '\n');


  return *this;

}


void GCNSubtarget::checkSubtargetFeatures(const Function &F) const {

  LLVMContext &Ctx = F.getContext();

  if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&

      hasFeature(AMDGPU::FeatureWavefrontSize64)) {

    Ctx.diagnose(DiagnosticInfoUnsupported(

        F, "must specify exactly one of wavefrontsize32 and wavefrontsize64"));

  }

}


GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,

                           const GCNTargetMachine &TM)

    : // clang-format off

    AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS),

    AMDGPUSubtarget(TT),

    TargetID(*this),

    InstrItins(getInstrItineraryForCPU(GPU)),

    InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),

    TLInfo(TM, *this),

    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {

  // clang-format on

  MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);

  EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(this);


  TSInfo = std::make_unique<AMDGPUSelectionDAGInfo>();


  CallLoweringInfo = std::make_unique<AMDGPUCallLowering>(*getTargetLowering());

  InlineAsmLoweringInfo =

      std::make_unique<InlineAsmLowering>(getTargetLowering());

  Legalizer = std::make_unique<AMDGPULegalizerInfo>(*this, TM);

  RegBankInfo = std::make_unique<AMDGPURegisterBankInfo>(*this);

  InstSelector =

      std::make_unique<AMDGPUInstructionSelector>(*this, *RegBankInfo, TM);

}


const SelectionDAGTargetInfo *GCNSubtarget::getSelectionDAGInfo() const {

  return TSInfo.get();

}


unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {

  if (getGeneration() < GFX10)

    return 1;


  switch (Opcode) {

  case AMDGPU::V_LSHLREV_B64_e64:

  case AMDGPU::V_LSHLREV_B64_gfx10:

  case AMDGPU::V_LSHLREV_B64_e64_gfx11:

  case AMDGPU::V_LSHLREV_B64_e32_gfx12:

  case AMDGPU::V_LSHLREV_B64_e64_gfx12:

  case AMDGPU::V_LSHL_B64_e64:

  case AMDGPU::V_LSHRREV_B64_e64:

  case AMDGPU::V_LSHRREV_B64_gfx10:

  case AMDGPU::V_LSHRREV_B64_e64_gfx11:

  case AMDGPU::V_LSHRREV_B64_e64_gfx12:

  case AMDGPU::V_LSHR_B64_e64:

  case AMDGPU::V_ASHRREV_I64_e64:

  case AMDGPU::V_ASHRREV_I64_gfx10:

  case AMDGPU::V_ASHRREV_I64_e64_gfx11:

  case AMDGPU::V_ASHRREV_I64_e64_gfx12:

  case AMDGPU::V_ASHR_I64_e64:

    return 1;

  }


  return 2;

}


/// This list was mostly derived from experimentation.


bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {

  switch (Opcode) {

  case AMDGPU::V_CVT_F16_F32_e32:

  case AMDGPU::V_CVT_F16_F32_e64:

  case AMDGPU::V_CVT_F16_U16_e32:

  case AMDGPU::V_CVT_F16_U16_e64:

  case AMDGPU::V_CVT_F16_I16_e32:

  case AMDGPU::V_CVT_F16_I16_e64:

  case AMDGPU::V_RCP_F16_e64:

  case AMDGPU::V_RCP_F16_e32:

  case AMDGPU::V_RSQ_F16_e64:

  case AMDGPU::V_RSQ_F16_e32:

  case AMDGPU::V_SQRT_F16_e64:

  case AMDGPU::V_SQRT_F16_e32:

  case AMDGPU::V_LOG_F16_e64:

  case AMDGPU::V_LOG_F16_e32:

  case AMDGPU::V_EXP_F16_e64:

  case AMDGPU::V_EXP_F16_e32:

  case AMDGPU::V_SIN_F16_e64:

  case AMDGPU::V_SIN_F16_e32:

  case AMDGPU::V_COS_F16_e64:

  case AMDGPU::V_COS_F16_e32:

  case AMDGPU::V_FLOOR_F16_e64:

  case AMDGPU::V_FLOOR_F16_e32:

  case AMDGPU::V_CEIL_F16_e64:

  case AMDGPU::V_CEIL_F16_e32:

  case AMDGPU::V_TRUNC_F16_e64:

  case AMDGPU::V_TRUNC_F16_e32:

  case AMDGPU::V_RNDNE_F16_e64:

  case AMDGPU::V_RNDNE_F16_e32:

  case AMDGPU::V_FRACT_F16_e64:

  case AMDGPU::V_FRACT_F16_e32:

  case AMDGPU::V_FREXP_MANT_F16_e64:

  case AMDGPU::V_FREXP_MANT_F16_e32:

  case AMDGPU::V_FREXP_EXP_I16_F16_e64:

  case AMDGPU::V_FREXP_EXP_I16_F16_e32:

  case AMDGPU::V_LDEXP_F16_e64:

  case AMDGPU::V_LDEXP_F16_e32:

  case AMDGPU::V_LSHLREV_B16_e64:

  case AMDGPU::V_LSHLREV_B16_e32:

  case AMDGPU::V_LSHRREV_B16_e64:

  case AMDGPU::V_LSHRREV_B16_e32:

  case AMDGPU::V_ASHRREV_I16_e64:

  case AMDGPU::V_ASHRREV_I16_e32:

  case AMDGPU::V_ADD_U16_e64:

  case AMDGPU::V_ADD_U16_e32:

  case AMDGPU::V_SUB_U16_e64:

  case AMDGPU::V_SUB_U16_e32:

  case AMDGPU::V_SUBREV_U16_e64:

  case AMDGPU::V_SUBREV_U16_e32:

  case AMDGPU::V_MUL_LO_U16_e64:

  case AMDGPU::V_MUL_LO_U16_e32:

  case AMDGPU::V_ADD_F16_e64:

  case AMDGPU::V_ADD_F16_e32:

  case AMDGPU::V_SUB_F16_e64:

  case AMDGPU::V_SUB_F16_e32:

  case AMDGPU::V_SUBREV_F16_e64:

  case AMDGPU::V_SUBREV_F16_e32:

  case AMDGPU::V_MUL_F16_e64:

  case AMDGPU::V_MUL_F16_e32:

  case AMDGPU::V_MAX_F16_e64:

  case AMDGPU::V_MAX_F16_e32:

  case AMDGPU::V_MIN_F16_e64:

  case AMDGPU::V_MIN_F16_e32:

  case AMDGPU::V_MAX_U16_e64:

  case AMDGPU::V_MAX_U16_e32:

  case AMDGPU::V_MIN_U16_e64:

  case AMDGPU::V_MIN_U16_e32:

  case AMDGPU::V_MAX_I16_e64:

  case AMDGPU::V_MAX_I16_e32:

  case AMDGPU::V_MIN_I16_e64:

  case AMDGPU::V_MIN_I16_e32:

  case AMDGPU::V_MAD_F16_e64:

  case AMDGPU::V_MAD_U16_e64:

  case AMDGPU::V_MAD_I16_e64:

  case AMDGPU::V_FMA_F16_e64:

  case AMDGPU::V_DIV_FIXUP_F16_e64:

    // On gfx10, all 16-bit instructions preserve the high bits.

    return getGeneration() <= AMDGPUSubtarget::GFX9;

  case AMDGPU::V_MADAK_F16:

  case AMDGPU::V_MADMK_F16:

  case AMDGPU::V_MAC_F16_e64:

  case AMDGPU::V_MAC_F16_e32:

  case AMDGPU::V_FMAMK_F16:

  case AMDGPU::V_FMAAK_F16:

  case AMDGPU::V_FMAC_F16_e64:

  case AMDGPU::V_FMAC_F16_e32:

    // In gfx9, the preferred handling of the unused high 16-bits changed. Most

    // instructions maintain the legacy behavior of 0ing. Some instructions

    // changed to preserving the high bits.

    return getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;

  case AMDGPU::V_MAD_MIXLO_F16:

  case AMDGPU::V_MAD_MIXHI_F16:

  default:

    return false;

  }

}


void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,

                                       const SchedRegion &Region) const {

  // Track register pressure so the scheduler can try to decrease

  // pressure once register usage is above the threshold defined by

  // SIRegisterInfo::getRegPressureSetLimit()

  Policy.ShouldTrackPressure = true;


  // Enabling both top down and bottom up scheduling seems to give us less

  // register spills than just using one of these approaches on its own.

  Policy.OnlyTopDown = false;

  Policy.OnlyBottomUp = false;


  // Enabling ShouldTrackLaneMasks crashes the SI Machine Scheduler.

  if (!enableSIScheduler())

    Policy.ShouldTrackLaneMasks = true;

}


void GCNSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,

                                             const SchedRegion &Region) const {

  const Function &F = Region.RegionBegin->getMF()->getFunction();

  Attribute PostRADirectionAttr = F.getFnAttribute("amdgpu-post-ra-direction");

  if (!PostRADirectionAttr.isValid())

    return;


  StringRef PostRADirectionStr = PostRADirectionAttr.getValueAsString();

  if (PostRADirectionStr == "topdown") {

    Policy.OnlyTopDown = true;

    Policy.OnlyBottomUp = false;

  } else if (PostRADirectionStr == "bottomup") {

    Policy.OnlyTopDown = false;

    Policy.OnlyBottomUp = true;

  } else if (PostRADirectionStr == "bidirectional") {

    Policy.OnlyTopDown = false;

    Policy.OnlyBottomUp = false;

  } else {

    DiagnosticInfoOptimizationFailure Diag(

        F, F.getSubprogram(), "invalid value for postRA direction attribute");

    F.getContext().diagnose(Diag);

  }


  LLVM_DEBUG({

    const char *DirStr = "default";

    if (Policy.OnlyTopDown && !Policy.OnlyBottomUp)

      DirStr = "topdown";

    else if (!Policy.OnlyTopDown && Policy.OnlyBottomUp)

      DirStr = "bottomup";

    else if (!Policy.OnlyTopDown && !Policy.OnlyBottomUp)

      DirStr = "bidirectional";


    dbgs() << "Post-MI-sched direction (" << F.getName() << "): " << DirStr

           << '\n';

  });

}


void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const {

  if (isWave32()) {

    // Fix implicit $vcc operands after MIParser has verified that they match

    // the instruction definitions.

    for (auto &MBB : MF) {

      for (auto &MI : MBB)

        InstrInfo.fixImplicitOperands(MI);

    }

  }

}


bool GCNSubtarget::hasMadF16() const {

  return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;

}


bool GCNSubtarget::useVGPRIndexMode() const {

  return hasVGPRIndexMode() && (!hasMovrel() || EnableVGPRIndexMode);

}


bool GCNSubtarget::useAA() const { return UseAA; }


unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {

  return AMDGPU::IsaInfo::getOccupancyWithNumSGPRs(SGPRs, getMaxWavesPerEU(),

                                                   getGeneration());

}


unsigned


GCNSubtarget::getOccupancyWithNumVGPRs(unsigned NumVGPRs,

                                       unsigned DynamicVGPRBlockSize) const {

  return AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs(this, NumVGPRs,

                                                       DynamicVGPRBlockSize);

}


unsigned


GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {

  if (getGeneration() >= AMDGPUSubtarget::GFX10)

    return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.


  if (HasFlatScratch || HasArchitectedFlatScratch) {

    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)

      return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).

    if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)

      return 4; // FLAT_SCRATCH, VCC (in that order).

  }


  if (isXNACKEnabled())

    return 4; // XNACK, VCC (in that order).

  return 2;   // VCC.

}


unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {

  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

  return getBaseReservedNumSGPRs(MFI.getUserSGPRInfo().hasFlatScratchInit());

}


unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {

  // In principle we do not need to reserve SGPR pair used for flat_scratch if

  // we know flat instructions do not access the stack anywhere in the

  // program. For now assume it's needed if we have flat instructions.

  const bool KernelUsesFlatScratch = hasFlatAddressSpace();

  return getBaseReservedNumSGPRs(KernelUsesFlatScratch);

}


std::pair<unsigned, unsigned>


GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize,

                               unsigned NumSGPRs, unsigned NumVGPRs) const {

  unsigned DynamicVGPRBlockSize = AMDGPU::getDynamicVGPRBlockSize(F);

  // Temporarily check both the attribute and the subtarget feature until the

  // latter is removed.

  if (DynamicVGPRBlockSize == 0 && isDynamicVGPREnabled())

    DynamicVGPRBlockSize = getDynamicVGPRBlockSize();


  auto [MinOcc, MaxOcc] = getOccupancyWithWorkGroupSizes(LDSSize, F);

  unsigned SGPROcc = getOccupancyWithNumSGPRs(NumSGPRs);

  unsigned VGPROcc = getOccupancyWithNumVGPRs(NumVGPRs, DynamicVGPRBlockSize);


  // Maximum occupancy may be further limited by high SGPR/VGPR usage.

  MaxOcc = std::min(MaxOcc, std::min(SGPROcc, VGPROcc));

  return {std::min(MinOcc, MaxOcc), MaxOcc};

}


unsigned GCNSubtarget::getBaseMaxNumSGPRs(

    const Function &F, std::pair<unsigned, unsigned> WavesPerEU,

    unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const {

  // Compute maximum number of SGPRs function can use using default/requested

  // minimum number of waves per execution unit.

  unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);

  unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);


  // Check if maximum number of SGPRs was explicitly requested using

  // "amdgpu-num-sgpr" attribute.

  unsigned Requested =

      F.getFnAttributeAsParsedInteger("amdgpu-num-sgpr", MaxNumSGPRs);


  if (Requested != MaxNumSGPRs) {

    // Make sure requested value does not violate subtarget's specifications.

    if (Requested && (Requested <= ReservedNumSGPRs))

      Requested = 0;


    // If more SGPRs are required to support the input user/system SGPRs,

    // increase to accommodate them.

    //

    // FIXME: This really ends up using the requested number of SGPRs + number

    // of reserved special registers in total. Theoretically you could re-use

    // the last input registers for these special registers, but this would

    // require a lot of complexity to deal with the weird aliasing.

    unsigned InputNumSGPRs = PreloadedSGPRs;

    if (Requested && Requested < InputNumSGPRs)

      Requested = InputNumSGPRs;


    // Make sure requested value is compatible with values implied by

    // default/requested minimum/maximum number of waves per execution unit.

    if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))

      Requested = 0;

    if (WavesPerEU.second && Requested &&

        Requested < getMinNumSGPRs(WavesPerEU.second))

      Requested = 0;


    if (Requested)

      MaxNumSGPRs = Requested;

  }


  if (hasSGPRInitBug())

    MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;


  return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);

}


unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {

  const Function &F = MF.getFunction();

  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

  return getBaseMaxNumSGPRs(F, MFI.getWavesPerEU(), MFI.getNumPreloadedSGPRs(),

                            getReservedNumSGPRs(MF));

}


unsigned GCNSubtarget::getMaxNumPreloadedSGPRs() const {

  using USI = GCNUserSGPRUsageInfo;

  // Max number of user SGPRs

  const unsigned MaxUserSGPRs =

      USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +

      USI::getNumUserSGPRForField(USI::DispatchPtrID) +

      USI::getNumUserSGPRForField(USI::QueuePtrID) +

      USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +

      USI::getNumUserSGPRForField(USI::DispatchIdID) +

      USI::getNumUserSGPRForField(USI::FlatScratchInitID) +

      USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);


  // Max number of system SGPRs

  const unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX

                                  1 + // WorkGroupIDY

                                  1 + // WorkGroupIDZ

                                  1 + // WorkGroupInfo

                                  1;  // private segment wave byte offset


  // Max number of synthetic SGPRs

  const unsigned SyntheticSGPRs = 1; // LDSKernelId


  return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;

}


unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {

  return getBaseMaxNumSGPRs(F, getWavesPerEU(F), getMaxNumPreloadedSGPRs(),

                            getReservedNumSGPRs(F));

}


unsigned GCNSubtarget::getBaseMaxNumVGPRs(

    const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {

  const auto [Min, Max] = NumVGPRBounds;


  // Check if maximum number of VGPRs was explicitly requested using

  // "amdgpu-num-vgpr" attribute.


  unsigned Requested = F.getFnAttributeAsParsedInteger("amdgpu-num-vgpr", Max);

  if (Requested != Max && hasGFX90AInsts())

    Requested *= 2;


  // Make sure requested value is inside the range of possible VGPR usage.

  return std::clamp(Requested, Min, Max);

}


unsigned GCNSubtarget::getMaxNumVGPRs(const Function &F) const {

  // Temporarily check both the attribute and the subtarget feature, until the

  // latter is removed.

  unsigned DynamicVGPRBlockSize = AMDGPU::getDynamicVGPRBlockSize(F);

  if (DynamicVGPRBlockSize == 0 && isDynamicVGPREnabled())

    DynamicVGPRBlockSize = getDynamicVGPRBlockSize();


  std::pair<unsigned, unsigned> Waves = getWavesPerEU(F);

  return getBaseMaxNumVGPRs(

      F, {getMinNumVGPRs(Waves.second, DynamicVGPRBlockSize),

          getMaxNumVGPRs(Waves.first, DynamicVGPRBlockSize)});

}


unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {

  return getMaxNumVGPRs(MF.getFunction());

}


std::pair<unsigned, unsigned>


GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {

  const unsigned MaxVectorRegs = getMaxNumVGPRs(F);


  unsigned MaxNumVGPRs = MaxVectorRegs;

  unsigned MaxNumAGPRs = 0;

  unsigned NumArchVGPRs = has1024AddressableVGPRs() ? 1024 : 256;


  // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,

  // a wave may have up to 512 total vector registers combining together both

  // VGPRs and AGPRs. Hence, in an entry function without calls and without

  // AGPRs used within it, it is possible to use the whole vector register

  // budget for VGPRs.

  //

  // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split

  //       register file accordingly.

  if (hasGFX90AInsts()) {

    unsigned MinNumAGPRs = 0;

    const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();


    const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};


    // TODO: The lower bound should probably force the number of required

    // registers up, overriding amdgpu-waves-per-eu.

    std::tie(MinNumAGPRs, MaxNumAGPRs) =

        AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", DefaultNumAGPR,

                                        /*OnlyFirstRequired=*/true);


    if (MinNumAGPRs == DefaultNumAGPR.first) {

      // Default to splitting half the registers if AGPRs are required.

      MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;

    } else {

      // Align to accum_offset's allocation granularity.

      MinNumAGPRs = alignTo(MinNumAGPRs, 4);


      MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);

    }


    // Clamp values to be inbounds of our limits, and ensure min <= max.


    MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);

    MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);


    MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, NumArchVGPRs);

    MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);


    assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&

           MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= NumArchVGPRs &&

           "invalid register counts");

  } else if (hasMAIInsts()) {

    // On gfx908 the number of AGPRs always equals the number of VGPRs.

    MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;

  }


  return std::pair(MaxNumVGPRs, MaxNumAGPRs);

}


void GCNSubtarget::adjustSchedDependency(

    SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,

    const TargetSchedModel *SchedModel) const {

  if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() || !Def->isInstr() ||

      !Use->isInstr())

    return;


  MachineInstr *DefI = Def->getInstr();

  MachineInstr *UseI = Use->getInstr();


  if (DefI->isBundle()) {

    const SIRegisterInfo *TRI = getRegisterInfo();

    auto Reg = Dep.getReg();

    MachineBasicBlock::const_instr_iterator I(DefI->getIterator());

    MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());

    unsigned Lat = 0;

    for (++I; I != E && I->isBundledWithPred(); ++I) {

      if (I->modifiesRegister(Reg, TRI))

        Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);

      else if (Lat)

        --Lat;

    }

    Dep.setLatency(Lat);

  } else if (UseI->isBundle()) {

    const SIRegisterInfo *TRI = getRegisterInfo();

    auto Reg = Dep.getReg();

    MachineBasicBlock::const_instr_iterator I(UseI->getIterator());

    MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());

    unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);

    for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {

      if (I->readsRegister(Reg, TRI))

        break;

      --Lat;

    }

    Dep.setLatency(Lat);

  } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {

    // Work around the fact that SIInstrInfo::fixImplicitOperands modifies

    // implicit operands which come from the MCInstrDesc, which can fool

    // ScheduleDAGInstrs::addPhysRegDataDeps into treating them as implicit

    // pseudo operands.

    Dep.setLatency(InstrInfo.getSchedModel().computeOperandLatency(

        DefI, DefOpIdx, UseI, UseOpIdx));

  }

}


unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {

  if (getGeneration() >= AMDGPUSubtarget::GFX12)

    return 0; // Not MIMG encoding.


  if (NSAThreshold.getNumOccurrences() > 0)

    return std::max(NSAThreshold.getValue(), 2u);


  int Value = MF.getFunction().getFnAttributeAsParsedInteger(

      "amdgpu-nsa-threshold", -1);

  if (Value > 0)

    return std::max(Value, 2);


  return NSAThreshold;

}


GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,

                                           const GCNSubtarget &ST)

    : ST(ST) {

  const CallingConv::ID CC = F.getCallingConv();

  const bool IsKernel =

      CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;


  if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))

    KernargSegmentPtr = true;


  bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);

  if (IsAmdHsaOrMesa && !ST.enableFlatScratch())

    PrivateSegmentBuffer = true;

  else if (ST.isMesaGfxShader(F))

    ImplicitBufferPtr = true;


  if (!AMDGPU::isGraphics(CC)) {

    if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))

      DispatchPtr = true;


    // FIXME: Can this always be disabled with < COv5?

    if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))

      QueuePtr = true;


    if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))

      DispatchID = true;

  }


  if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&

      (IsAmdHsaOrMesa || ST.enableFlatScratch()) &&

      // FlatScratchInit cannot be true for graphics CC if enableFlatScratch()

      // is false.

      (ST.enableFlatScratch() ||

       (!AMDGPU::isGraphics(CC) &&

        !F.hasFnAttribute("amdgpu-no-flat-scratch-init"))) &&

      !ST.flatScratchIsArchitected()) {

    FlatScratchInit = true;

  }


  if (hasImplicitBufferPtr())

    NumUsedUserSGPRs += getNumUserSGPRForField(ImplicitBufferPtrID);


  if (hasPrivateSegmentBuffer())

    NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentBufferID);


  if (hasDispatchPtr())

    NumUsedUserSGPRs += getNumUserSGPRForField(DispatchPtrID);


  if (hasQueuePtr())

    NumUsedUserSGPRs += getNumUserSGPRForField(QueuePtrID);


  if (hasKernargSegmentPtr())

    NumUsedUserSGPRs += getNumUserSGPRForField(KernargSegmentPtrID);


  if (hasDispatchID())

    NumUsedUserSGPRs += getNumUserSGPRForField(DispatchIdID);


  if (hasFlatScratchInit())

    NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);


  if (hasPrivateSegmentSize())

    NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);

}


void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {

  assert(NumKernargPreloadSGPRs + NumSGPRs <= AMDGPU::getMaxNumUserSGPRs(ST));

  NumKernargPreloadSGPRs += NumSGPRs;

  NumUsedUserSGPRs += NumSGPRs;

}


unsigned GCNUserSGPRUsageInfo::getNumFreeUserSGPRs() {

  return AMDGPU::getMaxNumUserSGPRs(ST) - NumUsedUserSGPRs;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

UseAA
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))

AMDGPUBaseInfo.h

AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.

AMDGPUInstructionSelector.h
This file declares the targeting of the InstructionSelector class for AMDGPU.

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.

AMDGPUSelectionDAGInfo.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

DiagnosticInfo.h

NSAThreshold
static cl::opt< unsigned > NSAThreshold("amdgpu-nsa-threshold", cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(2), cl::Hidden)

EnableVGPRIndexMode
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))

UseAA
static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlineAsmLowering.h
This file describes how to lower LLVM inline asm to machine code INLINEASM.

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MDBuilder.h

MachineScheduler.h

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

SIMachineFunctionInfo.h

SmallString.h
This file defines the SmallString class.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

TargetFrameLowering.h

AMDGPUGenSubtargetInfo

llvm::AMDGPUSubtarget::HasFminFmaxLegacy
bool HasFminFmaxLegacy
Definition AMDGPUSubtarget.h:75

llvm::AMDGPUSubtarget::WavefrontSizeLog2
char WavefrontSizeLog2
Definition AMDGPUSubtarget.h:83

llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition AMDGPUSubtarget.cpp:213

llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition AMDGPUSubtarget.h:170

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition AMDGPUSubtarget.h:44

llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition AMDGPUSubtarget.h:33

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::EUsPerCU
unsigned EUsPerCU
Definition AMDGPUSubtarget.h:79

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition AMDGPUSubtarget.h:314

llvm::AMDGPUSubtarget::HasSMulHi
bool HasSMulHi
Definition AMDGPUSubtarget.h:73

llvm::AMDGPUSubtarget::LocalMemorySize
unsigned LocalMemorySize
Definition AMDGPUSubtarget.h:81

llvm::AMDGPUSubtarget::MaxWavesPerEU
unsigned MaxWavesPerEU
Definition AMDGPUSubtarget.h:80

llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(Triple TT)
Definition AMDGPUSubtarget.cpp:35

llvm::AMDGPUSubtarget::AddressableLocalMemorySize
unsigned AddressableLocalMemorySize
Definition AMDGPUSubtarget.h:82

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:189

llvm::Attribute
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69

llvm::Attribute::getValueAsString
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
Definition Attributes.cpp:400

llvm::Attribute::isValid
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:223

llvm::DiagnosticInfoOptimizationFailure
Diagnostic information for optimization failures.
Definition DiagnosticInfo.h:1069

llvm::DiagnosticInfoUnsupported
Diagnostic information for unsupported feature in backend.
Definition DiagnosticInfo.h:1101

llvm::Function
Definition Function.h:64

llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition GCNSubtarget.h:443

llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition GCNSubtarget.h:65

llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition GCNSubtarget.cpp:391

llvm::GCNSubtarget::mirFileLoaded
void mirFileLoaded(MachineFunction &MF) const override
Definition GCNSubtarget.cpp:376

llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition GCNSubtarget.h:67

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition GCNSubtarget.h:1628

llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition GCNSubtarget.h:1316

llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition GCNSubtarget.h:865

llvm::GCNSubtarget::has1024AddressableVGPRs
bool has1024AddressableVGPRs() const
Definition GCNSubtarget.h:1446

llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition GCNSubtarget.cpp:196

llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition GCNSubtarget.h:365

llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Definition GCNSubtarget.cpp:630

llvm::GCNSubtarget::overridePostRASchedPolicy
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:339

llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition GCNSubtarget.h:1031

llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition GCNSubtarget.cpp:387

llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition GCNSubtarget.h:1214

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1707

llvm::GCNSubtarget::isDynamicVGPREnabled
bool isDynamicVGPREnabled() const
Definition GCNSubtarget.h:1828

llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition GCNSubtarget.h:335

llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
Definition GCNSubtarget.cpp:541

llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition GCNSubtarget.h:66

llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition GCNSubtarget.cpp:224

llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition GCNSubtarget.cpp:457

llvm::GCNSubtarget::getMaxNumPreloadedSGPRs
unsigned getMaxNumPreloadedSGPRs() const
Definition GCNSubtarget.cpp:511

llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition GCNSubtarget.cpp:57

llvm::GCNSubtarget::Gen
unsigned Gen
Definition GCNSubtarget.h:64

llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:322

llvm::GCNSubtarget::computeOccupancy
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition GCNSubtarget.cpp:440

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1715

llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition GCNSubtarget.h:331

llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:675

llvm::GCNSubtarget::hasFlatAddressSpace
bool hasFlatAddressSpace() const
Definition GCNSubtarget.h:657

llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:426

llvm::GCNSubtarget::hasMovrel
bool hasMovrel() const
Definition GCNSubtarget.h:1070

llvm::GCNSubtarget::useAA
bool useAA() const override
Definition GCNSubtarget.cpp:395

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition GCNSubtarget.h:1759

llvm::GCNSubtarget::hasVGPRIndexMode
bool hasVGPRIndexMode() const
Definition GCNSubtarget.h:1074

llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition GCNSubtarget.cpp:403

llvm::GCNSubtarget::HasArchitectedFlatScratch
bool HasArchitectedFlatScratch
Definition GCNSubtarget.h:229

llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition GCNSubtarget.cpp:397

llvm::GCNSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition AMDGPUSubtarget.h:382

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition GCNSubtarget.h:371

llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
Definition GCNSubtarget.cpp:167

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition GCNSubtarget.h:1634

llvm::GCNSubtarget::getMaxNumVectorRegs
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
Definition GCNSubtarget.cpp:574

llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition GCNSubtarget.h:643

llvm::GCNSubtarget::FlatForGlobal
bool FlatForGlobal
Definition GCNSubtarget.h:75

llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition GCNSubtarget.cpp:410

llvm::GCNSubtarget::enableSIScheduler
bool enableSIScheduler() const
Definition GCNSubtarget.h:1206

llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition GCNSubtarget.h:439

llvm::GCNSubtarget::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition GCNSubtarget.h:1829

llvm::GCNSubtarget::hasFP64
bool hasFP64() const
Definition GCNSubtarget.h:419

llvm::GCNSubtarget::checkSubtargetFeatures
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Definition GCNSubtarget.cpp:158

llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override

llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition GCNSubtarget.cpp:192

llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition GCNSubtarget.h:63

llvm::GCNTargetMachine
Definition AMDGPUTargetMachine.h:81

llvm::GCNUserSGPRUsageInfo
Definition GCNSubtarget.h:1898

llvm::GCNUserSGPRUsageInfo::getNumUserSGPRForField
static unsigned getNumUserSGPRForField(UserSGPRID ID)
Definition GCNSubtarget.h:1936

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition GCNSubtarget.h:1906

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition GCNSubtarget.h:1908

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition GCNSubtarget.cpp:754

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition GCNSubtarget.h:1910

llvm::GCNUserSGPRUsageInfo::ImplicitBufferPtrID
@ ImplicitBufferPtrID
Definition GCNSubtarget.h:1925

llvm::GCNUserSGPRUsageInfo::DispatchIdID
@ DispatchIdID
Definition GCNSubtarget.h:1930

llvm::GCNUserSGPRUsageInfo::QueuePtrID
@ QueuePtrID
Definition GCNSubtarget.h:1928

llvm::GCNUserSGPRUsageInfo::DispatchPtrID
@ DispatchPtrID
Definition GCNSubtarget.h:1927

llvm::GCNUserSGPRUsageInfo::FlatScratchInitID
@ FlatScratchInitID
Definition GCNSubtarget.h:1931

llvm::GCNUserSGPRUsageInfo::PrivateSegmentBufferID
@ PrivateSegmentBufferID
Definition GCNSubtarget.h:1926

llvm::GCNUserSGPRUsageInfo::PrivateSegmentSizeID
@ PrivateSegmentSizeID
Definition GCNSubtarget.h:1932

llvm::GCNUserSGPRUsageInfo::KernargSegmentPtrID
@ KernargSegmentPtrID
Definition GCNSubtarget.h:1929

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition GCNSubtarget.h:1902

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition GCNSubtarget.cpp:760

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition GCNSubtarget.h:1900

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition GCNSubtarget.h:1914

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition GCNSubtarget.h:1904

llvm::GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Definition GCNSubtarget.cpp:690

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1912

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:371

llvm::MachineBasicBlock::const_instr_iterator
Instructions::const_iterator const_instr_iterator
Definition MachineBasicBlock.h:345

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:860

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:370

llvm::MachineInstr::isBundle
bool isBundle() const
Definition MachineInstr.h:1446

llvm::Region
Definition RegionInfo.h:887

llvm::SDep
Scheduling dependency.
Definition ScheduleDAG.h:51

llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition ScheduleDAG.h:513

llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55

llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition ScheduleDAG.h:147

llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition ScheduleDAG.h:142

llvm::SDep::getReg
Register getReg() const
Returns the register associated with this edge.
Definition ScheduleDAG.h:216

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:417

llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition SIMachineFunctionInfo.h:1005

llvm::SIMachineFunctionInfo::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU() const
Definition SIMachineFunctionInfo.h:1166

llvm::SIMachineFunctionInfo::getUserSGPRInfo
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Definition SIMachineFunctionInfo.h:703

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition SelectionDAGTargetInfo.h:33

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition TargetFrameLowering.h:47

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Triple::AMDHSA
@ AMDHSA
Definition Triple.h:232

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::cl::opt
Definition CommandLine.h:1455

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition AMDGPUBaseInfo.h:146

llvm::AMDGPU::IsaInfo::getNumWavesPerEUWithNumVGPRs
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1419

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1217

llvm::AMDGPU::IsaInfo::getLocalMemorySize
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1152

llvm::AMDGPU::IsaInfo::getEUsPerCU
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
Definition AMDGPUBaseInfo.cpp:1176

llvm::AMDGPU::IsaInfo::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
Definition AMDGPUBaseInfo.cpp:1436

llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2486

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1464

llvm::AMDGPU::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition AMDGPUBaseInfo.cpp:2435

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition AMDGPUBaseInfo.cpp:1611

llvm::AMDGPU::isGraphics
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1453

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition CallingConv.h:200

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition CallingConv.h:144

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition MachineScheduler.h:199

llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition MachineScheduler.h:208

llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition MachineScheduler.h:209

llvm::MachineSchedPolicy::ShouldTrackPressure
bool ShouldTrackPressure
Definition MachineScheduler.h:201

llvm::MachineSchedPolicy::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
Definition MachineScheduler.h:204

llvm::SchedRegion
A region of an MBB for scheduling.
Definition MachineScheduler.h:222

llvm::cl::desc
Definition CommandLine.h:411