doxygen/GCNSchedStrategy_8cpp_source.html

//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This contains a MachineSchedStrategy implementation for maximizing wave

/// occupancy on GCN hardware.

///

/// This pass will apply multiple scheduling stages to the same function.

/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual

/// entry point for the scheduling of those regions is

/// GCNScheduleDAGMILive::runSchedStages.


/// Generally, the reason for having multiple scheduling stages is to account

/// for the kernel-wide effect of register usage on occupancy.  Usually, only a

/// few scheduling regions will have register pressure high enough to limit

/// occupancy for the kernel, so constraints can be relaxed to improve ILP in

/// other regions.

///

//===----------------------------------------------------------------------===//


#include "GCNSchedStrategy.h"

#include "AMDGPUIGroupLP.h"

#include "GCNRegPressure.h"

#include "SIMachineFunctionInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/CalcSpillWeights.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/Support/ErrorHandling.h"


#define DEBUG_TYPE "machine-scheduler"


using namespace llvm;


static cl::opt<bool> DisableUnclusterHighRP(

    "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,

    cl::desc("Disable unclustered high register pressure "

             "reduction scheduling stage."),

    cl::init(false));


static cl::opt<bool> DisableClusteredLowOccupancy(

    "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,

    cl::desc("Disable clustered low occupancy "

             "rescheduling for ILP scheduling stage."),

    cl::init(false));


static cl::opt<unsigned> ScheduleMetricBias(

    "amdgpu-schedule-metric-bias", cl::Hidden,

    cl::desc(

        "Sets the bias which adds weight to occupancy vs latency. Set it to "

        "100 to chase the occupancy only."),

    cl::init(10));


static cl::opt<bool>

    RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,

               cl::desc("Relax occupancy targets for kernels which are memory "

                        "bound (amdgpu-membound-threshold), or "

                        "Wave Limited (amdgpu-limit-wave-threshold)."),

               cl::init(false));


static cl::opt<bool> GCNTrackers(

    "amdgpu-use-amdgpu-trackers", cl::Hidden,

    cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),

    cl::init(false));


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

#define DUMP_MAX_REG_PRESSURE

static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(

    "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,

    cl::desc("Print a list of live registers along with their def/uses at the "

             "point of maximum register pressure before scheduling."),

    cl::init(false));


static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(

    "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,

    cl::desc("Print a list of live registers along with their def/uses at the "

             "point of maximum register pressure after scheduling."),

    cl::init(false));

#endif


const unsigned ScheduleMetrics::ScaleFactor = 100;


GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)

    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),

      DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {

}


void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {

  GenericScheduler::initialize(DAG);


  MF = &DAG->MF;


  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();


  SGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);

  VGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);


  SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  // Set the initial TargetOccupnacy to the maximum occupancy that we can

  // achieve for this function. This effectively sets a lower bound on the

  // 'Critical' register limits in the scheduler.

  // Allow for lower occupancy targets if kernel is wave limited or memory

  // bound, and using the relaxed occupancy feature.

  TargetOccupancy =

      RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();

  SGPRCriticalLimit =

      std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);


  if (!KnownExcessRP) {

    VGPRCriticalLimit = std::min(

        ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),

        VGPRExcessLimit);

  } else {

    // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except

    // returns a reasonably small number for targets with lots of VGPRs, such

    // as GFX10 and GFX11.

    LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "

                         "VGPRCriticalLimit calculation method.\n");

    unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();

    unsigned Granule =

        AMDGPU::IsaInfo::getVGPRAllocGranule(&ST, DynamicVGPRBlockSize);

    unsigned Addressable =

        AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST, DynamicVGPRBlockSize);

    unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);

    VGPRBudget = std::max(VGPRBudget, Granule);

    VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);

  }


  // Subtract error margin and bias from register limits and avoid overflow.

  SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);

  VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);

  SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);

  VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);


  LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit

                    << ", VGPRExcessLimit = " << VGPRExcessLimit

                    << ", SGPRCriticalLimit = " << SGPRCriticalLimit

                    << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");

}


/// Checks whether \p SU can use the cached DAG pressure diffs to compute the

/// current register pressure.

///

/// This works for the common case, but it has a few exceptions that have been

/// observed through trial and error:

///   - Explicit physical register operands

///   - Subregister definitions

///

/// In both of those cases, PressureDiff doesn't represent the actual pressure,

/// and querying LiveIntervals through the RegPressureTracker is needed to get

/// an accurate value.

///

/// We should eventually only use PressureDiff for maximum performance, but this

/// already allows 80% of SUs to take the fast path without changing scheduling

/// at all. Further changes would either change scheduling, or require a lot

/// more logic to recover an accurate pressure estimate from the PressureDiffs.


static bool canUsePressureDiffs(const SUnit &SU) {

  if (!SU.isInstr())

    return false;


  // Cannot use pressure diffs for subregister defs or with physregs, it's

  // imprecise in both cases.

  for (const auto &Op : SU.getInstr()->operands()) {

    if (!Op.isReg() || Op.isImplicit())

      continue;

    if (Op.getReg().isPhysical() ||

        (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))

      return false;

  }

  return true;

}


static void getRegisterPressures(

    bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU,

    std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,

    GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker,

    ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) {

  // getDownwardPressure() and getUpwardPressure() make temporary changes to

  // the tracker, so we need to pass those function a non-const copy.

  RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);

  if (!GCNTrackers) {

    AtTop

        ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)

        : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);


    return;

  }


  // GCNTrackers

  Pressure.resize(4, 0);

  MachineInstr *MI = SU->getInstr();

  GCNRegPressure NewPressure;

  if (AtTop) {

    GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);

    NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI);

  } else {

    GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);

    TempUpwardTracker.recede(*MI);

    NewPressure = TempUpwardTracker.getPressure();

  }

  Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =

      NewPressure.getArchVGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();

}


void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,

                                     bool AtTop,

                                     const RegPressureTracker &RPTracker,

                                     const SIRegisterInfo *SRI,

                                     unsigned SGPRPressure,

                                     unsigned VGPRPressure, bool IsBottomUp) {

  Cand.SU = SU;

  Cand.AtTop = AtTop;


  if (!DAG->isTrackingPressure())

    return;


  Pressure.clear();

  MaxPressure.clear();


  // We try to use the cached PressureDiffs in the ScheduleDAG whenever

  // possible over querying the RegPressureTracker.

  //

  // RegPressureTracker will make a lot of LIS queries which are very

  // expensive, it is considered a slow function in this context.

  //

  // PressureDiffs are precomputed and cached, and getPressureDiff is just a

  // trivial lookup into an array. It is pretty much free.

  //

  // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of

  // PressureDiffs.

  if (AtTop || !canUsePressureDiffs(*SU) || GCNTrackers) {

    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

  } else {

    // Reserve 4 slots.

    Pressure.resize(4, 0);

    Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;

    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;


    for (const auto &Diff : DAG->getPressureDiff(SU)) {

      if (!Diff.isValid())

        continue;

      // PressureDiffs is always bottom-up so if we're working top-down we need

      // to invert its sign.

      Pressure[Diff.getPSet()] +=

          (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());

    }


#ifdef EXPENSIVE_CHECKS

    std::vector<unsigned> CheckPressure, CheckMaxPressure;

    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

    if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||

        Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {

      errs() << "Register Pressure is inaccurate when calculated through "

                "PressureDiff\n"

             << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"

             << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";

      report_fatal_error("inaccurate register pressure calculation");

    }

#endif

  }


  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];


  // If two instructions increase the pressure of different register sets

  // by the same amount, the generic scheduler will prefer to schedule the

  // instruction that increases the set with the least amount of registers,

  // which in our case would be SGPRs.  This is rarely what we want, so

  // when we report excess/critical register pressure, we do it either

  // only for VGPRs or only for SGPRs.


  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.

  const unsigned MaxVGPRPressureInc = 16;

  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;

  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;


  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold

  // to increase the likelihood we don't go over the limits.  We should improve

  // the analysis to look through dependencies to find the path with the least

  // register pressure.


  // We only need to update the RPDelta for instructions that increase register

  // pressure. Instructions that decrease or keep reg pressure the same will be

  // marked as RegExcess in tryCandidate() when they are compared with

  // instructions that increase the register pressure.

  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);

  }


  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);

  }


  // Register pressure is considered 'CRITICAL' if it is approaching a value

  // that would reduce the wave occupancy for the execution unit.  When

  // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both

  // has the same cost, so we don't need to prefer one over the other.


  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;

  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;


  if (SGPRDelta >= 0 || VGPRDelta >= 0) {

    HasHighPressure = true;

    if (SGPRDelta > VGPRDelta) {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);

    } else {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);

    }

  }

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeFromQueue()


void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,

                                         const CandPolicy &ZonePolicy,

                                         const RegPressureTracker &RPTracker,

                                         SchedCandidate &Cand,

                                         bool IsBottomUp) {

  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();

  unsigned SGPRPressure = 0;

  unsigned VGPRPressure = 0;

  if (DAG->isTrackingPressure()) {

    if (!GCNTrackers) {

      SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

      VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

    } else {

      GCNRPTracker *T = IsBottomUp

                            ? static_cast<GCNRPTracker *>(&UpwardTracker)

                            : static_cast<GCNRPTracker *>(&DownwardTracker);

      SGPRPressure = T->getPressure().getSGPRNum();

      VGPRPressure = T->getPressure().getArchVGPRNum();

    }

  }

  ReadyQueue &Q = Zone.Available;

  for (SUnit *SU : Q) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

                  VGPRPressure, IsBottomUp);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    tryCandidate(Cand, TryCand, ZoneArg);

    if (TryCand.Reason != NoCand) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(Zone.DAG, SchedModel);

      Cand.setBest(TryCand);

      LLVM_DEBUG(traceCandidate(Cand));

    }

  }

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeBidirectional()


SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {

  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = Bot.pickOnlyChoice()) {

    IsTopNode = false;

    return SU;

  }

  if (SUnit *SU = Top.pickOnlyChoice()) {

    IsTopNode = true;

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone and

  // the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);


  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,

                      /*IsBottomUp=*/true);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,

                        /*IsBottomUp=*/true);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,

                      /*IsBottomUp=*/false);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,

                        /*IsBottomUp=*/false);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);

             dbgs() << "Bot Cand: "; traceCandidate(BotCand););

  SchedCandidate Cand = BotCand;

  TopCand.Reason = NoCand;

  tryCandidate(Cand, TopCand, nullptr);

  if (TopCand.Reason != NoCand) {

    Cand.setBest(TopCand);

  }

  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););


  IsTopNode = Cand.AtTop;

  return Cand.SU;

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNode()


SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  SUnit *SU;

  do {

    if (RegionPolicy.OnlyTopDown) {

      SU = Top.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,

                          /*IsBottomUp=*/false);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else if (RegionPolicy.OnlyBottomUp) {

      SU = Bot.pickOnlyChoice();

      if (!SU) {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,

                          /*IsBottomUp=*/true);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else {

      SU = pickNodeBidirectional(IsTopNode);

    }

  } while (SU->isScheduled);


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {

  if (GCNTrackers) {

    MachineInstr *MI = SU->getInstr();

    IsTopNode ? (void)DownwardTracker.advance(MI, false)

              : UpwardTracker.recede(*MI);

  }


  return GenericScheduler::schedNode(SU, IsTopNode);

}


GCNSchedStageID GCNSchedStrategy::getCurrentStage() {

  assert(CurrentStage && CurrentStage != SchedStages.end());

  return *CurrentStage;

}


bool GCNSchedStrategy::advanceStage() {

  assert(CurrentStage != SchedStages.end());

  if (!CurrentStage)

    CurrentStage = SchedStages.begin();

  else

    CurrentStage++;


  return CurrentStage != SchedStages.end();

}


bool GCNSchedStrategy::hasNextStage() const {

  assert(CurrentStage);

  return std::next(CurrentStage) != SchedStages.end();

}


GCNSchedStageID GCNSchedStrategy::getNextStage() const {

  assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());

  return *std::next(CurrentStage);

}


GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(

    const MachineSchedContext *C, bool IsLegacyScheduler)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);

  SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);

  SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);

  SchedStages.push_back(GCNSchedStageID::PreRARematerialize);

  GCNTrackers = GCNTrackers & !IsLegacyScheduler;

}


GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);

}


bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                          SchedCandidate &TryCand,

                                          SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Avoid spilling by exceeding the register limit.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                  RegExcess, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;


    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Unconditionally try to reduce latency.

    if (tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                  TryCand, Cand, RegCritical, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||

        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }

  return false;

}


GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy(

    const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::MemoryClauseInitialSchedule);

}


/// GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as

/// much as possible. This is achieved by:

//  1. Prioritize clustered operations before stall latency heuristic.

//  2. Prioritize long-latency-load before stall latency heuristic.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \param Zone describes the scheduled zone that we are extending, or nullptr

///             if Cand is from a different zone than TryCand.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)


bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                                   SchedCandidate &TryCand,

                                                   SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  if (DAG->isTrackingPressure()) {

    // Avoid exceeding the target's limit.

    if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                    RegExcess, TRI, DAG->MF))

      return TryCand.Reason != NoCand;


    // Avoid increasing the max critical pressure in the scheduled region.

    if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                    TryCand, Cand, RegCritical, TRI, DAG->MF))

      return TryCand.Reason != NoCand;

  }


  // MaxMemoryClause-specific: We prioritize clustered instructions as we would

  // get more benefit from clausing these memory instructions.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // For loops that are acyclic path limited, aggressively schedule for

    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

    // heuristics to take precedence.

    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

        tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // MaxMemoryClause-specific: Prioritize long latency memory load

    // instructions in top-bottom order to hide more latency. The mayLoad check

    // is used to exclude store-like instructions, which we do not want to

    // scheduler them too early.

    bool TryMayLoad =

        TryCand.SU->isInstr() && TryCand.SU->getInstr()->mayLoad();

    bool CandMayLoad = Cand.SU->isInstr() && Cand.SU->getInstr()->mayLoad();


    if (TryMayLoad || CandMayLoad) {

      bool TryLongLatency =

          TryCand.SU->Latency > 10 * Cand.SU->Latency && TryMayLoad;

      bool CandLongLatency =

          10 * TryCand.SU->Latency < Cand.SU->Latency && CandMayLoad;


      if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,

                     Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,

                     Cand, Stall))

        return TryCand.Reason != NoCand;

    }

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;

  }


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if (Zone->isTop() == (TryCand.SU->NodeNum < Cand.SU->NodeNum)) {

      assert(TryCand.SU->NodeNum != Cand.SU->NodeNum);

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


GCNScheduleDAGMILive::GCNScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)

    : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),

      MFI(*MF.getInfo<SIMachineFunctionInfo>()),

      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),

      RegionLiveOuts(this, /*IsLiveOut=*/true) {


  // We want regions with a single MI to be scheduled so that we can reason

  // about them correctly during scheduling stages that move MIs between regions

  // (e.g., rematerialization).

  ScheduleSingleMIRegions = true;

  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");

  if (RelaxedOcc) {

    MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);

    if (MinOccupancy != StartingOccupancy)

      LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy

                        << ".\n");

  }

}


std::unique_ptr<GCNSchedStage>

GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {

  switch (SchedStageID) {

  case GCNSchedStageID::OccInitialSchedule:

    return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);

  case GCNSchedStageID::PreRARematerialize:

    return std::make_unique<PreRARematStage>(SchedStageID, *this);

  case GCNSchedStageID::ILPInitialSchedule:

    return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,

                                                              *this);

  }


  llvm_unreachable("Unknown SchedStageID.");

}


void GCNScheduleDAGMILive::schedule() {

  // Collect all scheduling regions. The actual scheduling is performed in

  // GCNScheduleDAGMILive::finalizeSchedule.

  Regions.push_back(std::pair(RegionBegin, RegionEnd));

}


GCNRegPressure

GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {

  GCNDownwardRPTracker RPTracker(*LIS);

  RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,

                    &LiveIns[RegionIdx]);

  return RPTracker.moveMaxPressure();

}


static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,

                                        MachineBasicBlock::iterator RegionEnd) {

  auto REnd = RegionEnd == RegionBegin->getParent()->end()

                  ? std::prev(RegionEnd)

                  : RegionEnd;

  return &*skipDebugInstructionsBackward(REnd, RegionBegin);

}


void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,

                                                const MachineBasicBlock *MBB) {

  GCNDownwardRPTracker RPTracker(*LIS);


  // If the block has the only successor then live-ins of that successor are

  // live-outs of the current block. We can reuse calculated live set if the

  // successor will be sent to scheduling past current block.


  // However, due to the bug in LiveInterval analysis it may happen that two

  // predecessors of the same successor block have different lane bitmasks for

  // a live-out register. Workaround that by sticking to one-to-one relationship

  // i.e. one predecessor with one successor block.

  const MachineBasicBlock *OnlySucc = nullptr;

  if (MBB->succ_size() == 1) {

    auto *Candidate = *MBB->succ_begin();

    if (!Candidate->empty() && Candidate->pred_size() == 1) {

      SlotIndexes *Ind = LIS->getSlotIndexes();

      if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))

        OnlySucc = Candidate;

    }

  }


  // Scheduler sends regions from the end of the block upwards.

  size_t CurRegion = RegionIdx;

  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)

    if (Regions[CurRegion].first->getParent() != MBB)

      break;

  --CurRegion;


  auto I = MBB->begin();

  auto LiveInIt = MBBLiveIns.find(MBB);

  auto &Rgn = Regions[CurRegion];

  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

  if (LiveInIt != MBBLiveIns.end()) {

    auto LiveIn = std::move(LiveInIt->second);

    RPTracker.reset(*MBB->begin(), &LiveIn);

    MBBLiveIns.erase(LiveInIt);

  } else {

    I = Rgn.first;

    auto LRS = BBLiveInMap.lookup(NonDbgMI);

#ifdef EXPENSIVE_CHECKS

    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));

#endif

    RPTracker.reset(*I, &LRS);

  }


  for (;;) {

    I = RPTracker.getNext();


    if (Regions[CurRegion].first == I || NonDbgMI == I) {

      LiveIns[CurRegion] = RPTracker.getLiveRegs();

      RPTracker.clearMaxPressure();

    }


    if (Regions[CurRegion].second == I) {

      Pressure[CurRegion] = RPTracker.moveMaxPressure();

      if (CurRegion-- == RegionIdx)

        break;

      auto &Rgn = Regions[CurRegion];

      NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

    }

    RPTracker.advanceToNext();

    RPTracker.advanceBeforeNext();

  }


  if (OnlySucc) {

    if (I != MBB->end()) {

      RPTracker.advanceToNext();

      RPTracker.advance(MBB->end());

    }

    RPTracker.advanceBeforeNext();

    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();

  }

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveInMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionFirstMIs;

  RegionFirstMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))

    RegionFirstMIs.push_back(

        &*skipDebugInstructionsForward(RegionBegin, RegionEnd));


  return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveOutMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionLastMIs;

  RegionLastMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))

    RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));


  return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);

}


void RegionPressureMap::buildLiveRegMap() {

  IdxToInstruction.clear();


  RegionLiveRegMap =

      IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();

  for (unsigned I = 0; I < DAG->Regions.size(); I++) {

    MachineInstr *RegionKey =

        IsLiveOut

            ? getLastMIForRegion(DAG->Regions[I].first, DAG->Regions[I].second)

            : &*DAG->Regions[I].first;

    IdxToInstruction[I] = RegionKey;

  }

}


void GCNScheduleDAGMILive::finalizeSchedule() {

  // Start actual scheduling here. This function is called by the base

  // MachineScheduler after all regions have been recorded by

  // GCNScheduleDAGMILive::schedule().

  LiveIns.resize(Regions.size());

  Pressure.resize(Regions.size());

  RegionsWithHighRP.resize(Regions.size());

  RegionsWithExcessRP.resize(Regions.size());

  RegionsWithIGLPInstrs.resize(Regions.size());

  RegionsWithHighRP.reset();

  RegionsWithExcessRP.reset();

  RegionsWithIGLPInstrs.reset();


  runSchedStages();

}


void GCNScheduleDAGMILive::runSchedStages() {

  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");


  if (!Regions.empty()) {

    BBLiveInMap = getRegionLiveInMap();

    if (GCNTrackers)

      RegionLiveOuts.buildLiveRegMap();

  }


#ifdef DUMP_MAX_REG_PRESSURE

  if (PrintMaxRPRegUsageBeforeScheduler) {

    dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);

    dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);

    LIS->dump();

  }

#endif


  GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);

  while (S.advanceStage()) {

    auto Stage = createSchedStage(S.getCurrentStage());

    if (!Stage->initGCNSchedStage())

      continue;


    for (auto Region : Regions) {

      RegionBegin = Region.first;

      RegionEnd = Region.second;

      // Setup for scheduling the region and check whether it should be skipped.

      if (!Stage->initGCNRegion()) {

        Stage->advanceRegion();

        exitRegion();

        continue;

      }


      if (GCNTrackers) {

        GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();

        GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();

        GCNRPTracker::LiveRegSet *RegionLiveIns =

            &LiveIns[Stage->getRegionIdx()];


        reinterpret_cast<GCNRPTracker *>(DownwardTracker)

            ->reset(MRI, *RegionLiveIns);

        reinterpret_cast<GCNRPTracker *>(UpwardTracker)

            ->reset(MRI, RegionLiveOuts.getLiveRegsForRegionIdx(

                             Stage->getRegionIdx()));

      }


      ScheduleDAGMILive::schedule();

      Stage->finalizeGCNRegion();

    }


    Stage->finalizeGCNSchedStage();

  }


#ifdef DUMP_MAX_REG_PRESSURE

  if (PrintMaxRPRegUsageAfterScheduler) {

    dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);

    dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);

    LIS->dump();

  }

#endif

}


#ifndef NDEBUG


raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {

  switch (StageID) {

  case GCNSchedStageID::OccInitialSchedule:

    OS << "Max Occupancy Initial Schedule";

    break;

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    OS << "Unclustered High Register Pressure Reschedule";

    break;

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    OS << "Clustered Low Occupancy Reschedule";

    break;

  case GCNSchedStageID::PreRARematerialize:

    OS << "Pre-RA Rematerialize";

    break;

  case GCNSchedStageID::ILPInitialSchedule:

    OS << "Max ILP Initial Schedule";

    break;

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    OS << "Max memory clause Initial Schedule";

    break;

  }


  return OS;

}


#endif


GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)

    : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),

      MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}


bool GCNSchedStage::initGCNSchedStage() {

  if (!DAG.LIS)

    return false;


  LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");

  return true;

}


bool UnclusteredHighRPStage::initGCNSchedStage() {

  if (DisableUnclusterHighRP)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())

    return false;


  SavedMutations.swap(DAG.Mutations);

  DAG.addMutation(

      createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));


  InitialOccupancy = DAG.MinOccupancy;

  // Aggressivly try to reduce register pressure in the unclustered high RP

  // stage. Temporarily increase occupancy target in the region.

  S.SGPRLimitBias = S.HighRPSGPRBias;

  S.VGPRLimitBias = S.HighRPVGPRBias;

  if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)

    MFI.increaseOccupancy(MF, ++DAG.MinOccupancy);


  LLVM_DEBUG(

      dbgs()

      << "Retrying function scheduling without clustering. "

         "Aggressivly try to reduce register pressure to achieve occupancy "

      << DAG.MinOccupancy << ".\n");


  return true;

}


bool ClusteredLowOccStage::initGCNSchedStage() {

  if (DisableClusteredLowOccupancy)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  // Don't bother trying to improve ILP in lower RP regions if occupancy has not

  // been dropped. All regions will have already been scheduled with the ideal

  // occupancy targets.

  if (DAG.StartingOccupancy <= DAG.MinOccupancy)

    return false;


  LLVM_DEBUG(

      dbgs() << "Retrying function scheduling with lowest recorded occupancy "

             << DAG.MinOccupancy << ".\n");

  return true;

}


/// Allows to easily filter for this stage's debug output.

#define REMAT_PREFIX "[PreRARemat] "

#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)


bool PreRARematStage::initGCNSchedStage() {

  // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for

  // regions inbetween the defs and region we sinked the def to. Will need to be

  // fixed if there is another pass after this pass.

  assert(!S.hasNextStage());


  if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1)

    return false;


  // Before performing any IR modification record the parent region of each MI

  // and the parent MBB of each region.

  const unsigned NumRegions = DAG.Regions.size();

  RegionBB.reserve(NumRegions);

  for (unsigned I = 0; I < NumRegions; ++I) {

    RegionBoundaries Region = DAG.Regions[I];

    for (auto MI = Region.first; MI != Region.second; ++MI)

      MIRegion.insert({&*MI, I});

    RegionBB.push_back(Region.first->getParent());

  }


  if (!canIncreaseOccupancyOrReduceSpill())

    return false;


  // Rematerialize identified instructions and update scheduler's state.

  rematerialize();

  if (GCNTrackers)

    DAG.RegionLiveOuts.buildLiveRegMap();

  REMAT_DEBUG({

    dbgs() << "Retrying function scheduling with new min. occupancy of "

           << AchievedOcc << " from rematerializing (original was "

           << DAG.MinOccupancy;

    if (TargetOcc)

      dbgs() << ", target was " << *TargetOcc;

    dbgs() << ")\n";

  });


  if (AchievedOcc > DAG.MinOccupancy) {

    DAG.MinOccupancy = AchievedOcc;

    SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

    MFI.increaseOccupancy(MF, DAG.MinOccupancy);

  }

  return true;

}


void GCNSchedStage::finalizeGCNSchedStage() {

  DAG.finishBlock();

  LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");

}


void UnclusteredHighRPStage::finalizeGCNSchedStage() {

  SavedMutations.swap(DAG.Mutations);

  S.SGPRLimitBias = S.VGPRLimitBias = 0;

  if (DAG.MinOccupancy > InitialOccupancy) {

    LLVM_DEBUG(dbgs() << StageID

                      << " stage successfully increased occupancy to "

                      << DAG.MinOccupancy << '\n');

  }


  GCNSchedStage::finalizeGCNSchedStage();

}


bool GCNSchedStage::initGCNRegion() {

  // Check whether this new region is also a new block.

  if (DAG.RegionBegin->getParent() != CurrentMBB)

    setupNewBlock();


  unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());

  DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);


  // Skip empty scheduling regions (0 or 1 schedulable instructions).

  if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(DAG.end()))

    return false;


  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)

                    << " " << CurrentMBB->getName()

                    << "\n  From: " << *DAG.begin() << "    To: ";

             if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;

             else dbgs() << "End";

             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');


  // Save original instruction order before scheduling for possible revert.

  Unsched.clear();

  Unsched.reserve(DAG.NumRegionInstrs);

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule) {

    const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);

    for (auto &I : DAG) {

      Unsched.push_back(&I);

      if (SII->isIGLPMutationOnly(I.getOpcode()))

        DAG.RegionsWithIGLPInstrs[RegionIdx] = true;

    }

  } else {

    for (auto &I : DAG)

      Unsched.push_back(&I);

  }


  PressureBefore = DAG.Pressure[RegionIdx];


  LLVM_DEBUG(

      dbgs() << "Pressure before scheduling:\nRegion live-ins:"

             << print(DAG.LiveIns[RegionIdx], DAG.MRI)

             << "Region live-in pressure:  "

             << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))

             << "Region register pressure: " << print(PressureBefore));


  S.HasHighPressure = false;

  S.KnownExcessRP = isRegionWithExcessRP();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {

    SavedMutations.clear();

    SavedMutations.swap(DAG.Mutations);

    bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||

                          StageID == GCNSchedStageID::ILPInitialSchedule;

    DAG.addMutation(createIGroupLPDAGMutation(

        IsInitialStage ? AMDGPU::SchedulingPhase::Initial

                       : AMDGPU::SchedulingPhase::PreRAReentry));

  }


  return true;

}


bool UnclusteredHighRPStage::initGCNRegion() {

  // Only reschedule regions that have excess register pressure (i.e. spilling)

  // or had minimum occupancy at the beginning of the stage (as long as

  // rescheduling of previous regions did not make occupancy drop back down to

  // the initial minimum).

  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();

  if (!DAG.RegionsWithExcessRP[RegionIdx] &&

      (DAG.MinOccupancy <= InitialOccupancy ||

       DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) !=

           InitialOccupancy))

    return false;


  return GCNSchedStage::initGCNRegion();

}


bool ClusteredLowOccStage::initGCNRegion() {

  // We may need to reschedule this region if it wasn't rescheduled in the last

  // stage, or if we found it was testing critical register pressure limits in

  // the unclustered reschedule stage. The later is because we may not have been

  // able to raise the min occupancy in the previous stage so the region may be

  // overly constrained even if it was already rescheduled.

  if (!DAG.RegionsWithHighRP[RegionIdx])

    return false;


  return GCNSchedStage::initGCNRegion();

}


bool PreRARematStage::initGCNRegion() {

  return RescheduleRegions[RegionIdx] && GCNSchedStage::initGCNRegion();

}


void GCNSchedStage::setupNewBlock() {

  if (CurrentMBB)

    DAG.finishBlock();


  CurrentMBB = DAG.RegionBegin->getParent();

  DAG.startBlock(CurrentMBB);

  // Get real RP for the region if it hasn't be calculated before. After the

  // initial schedule stage real RP will be collected after scheduling.

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule ||

      StageID == GCNSchedStageID::MemoryClauseInitialSchedule)

    DAG.computeBlockPressure(RegionIdx, CurrentMBB);

}


void GCNSchedStage::finalizeGCNRegion() {

  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);

  if (S.HasHighPressure)

    DAG.RegionsWithHighRP[RegionIdx] = true;


  // Revert scheduling if we have dropped occupancy or there is some other

  // reason that the original schedule is better.

  checkScheduling();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule)

    SavedMutations.swap(DAG.Mutations);


  DAG.exitRegion();

  advanceRegion();

}


void GCNSchedStage::checkScheduling() {

  // Check the results of scheduling.

  PressureAfter = DAG.getRealRegPressure(RegionIdx);


  LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));

  LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");


  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();


  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&

      PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {

    DAG.Pressure[RegionIdx] = PressureAfter;


    // Early out if we have achieved the occupancy target.

    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");

    return;

  }


  unsigned TargetOccupancy = std::min(

      S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);

  unsigned WavesAfter = std::min(

      TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));

  unsigned WavesBefore = std::min(

      TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));

  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore

                    << ", after " << WavesAfter << ".\n");


  // We may not be able to keep the current target occupancy because of the just

  // scheduled region. We might still be able to revert scheduling if the

  // occupancy before was higher, or if the current schedule has register

  // pressure higher than the excess limits which could lead to more spilling.

  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);


  // Allow memory bound functions to drop to 4 waves if not limited by an

  // attribute.

  if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&

      WavesAfter >= MFI.getMinAllowedOccupancy()) {

    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "

                      << MFI.getMinAllowedOccupancy() << " waves\n");

    NewOccupancy = WavesAfter;

  }


  if (NewOccupancy < DAG.MinOccupancy) {

    DAG.MinOccupancy = NewOccupancy;

    MFI.limitOccupancy(DAG.MinOccupancy);

    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "

                      << DAG.MinOccupancy << ".\n");

  }

  // The maximum number of arch VGPR on non-unified register file, or the

  // maximum VGPR + AGPR in the unified register file case.

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);

  // The maximum number of arch VGPR for both unified and non-unified register

  // file.

  unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);


  if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||

      PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||

      PressureAfter.getAGPRNum() > MaxArchVGPRs ||

      PressureAfter.getSGPRNum() > MaxSGPRs) {

    DAG.RegionsWithHighRP[RegionIdx] = true;

    DAG.RegionsWithExcessRP[RegionIdx] = true;

  }


  // Revert if this region's schedule would cause a drop in occupancy or

  // spilling.

  if (shouldRevertScheduling(WavesAfter))

    revertScheduling();

  else

    DAG.Pressure[RegionIdx] = PressureAfter;

}


unsigned


GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,

                                      DenseMap<unsigned, unsigned> &ReadyCycles,

                                      const TargetSchedModel &SM) {

  unsigned ReadyCycle = CurrCycle;

  for (auto &D : SU.Preds) {

    if (D.isAssignedRegDep()) {

      MachineInstr *DefMI = D.getSUnit()->getInstr();

      unsigned Latency = SM.computeInstrLatency(DefMI);

      unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];

      ReadyCycle = std::max(ReadyCycle, DefReady + Latency);

    }

  }

  ReadyCycles[SU.NodeNum] = ReadyCycle;

  return ReadyCycle;

}


#ifndef NDEBUG


struct EarlierIssuingCycle {


  bool operator()(std::pair<MachineInstr *, unsigned> A,

                  std::pair<MachineInstr *, unsigned> B) const {

    return A.second < B.second;

  }


};


static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,

                                        EarlierIssuingCycle> &ReadyCycles) {

  if (ReadyCycles.empty())

    return;

  unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();

  dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum

         << " ##################\n# Cycle #\t\t\tInstruction          "

            "             "

            "                            \n";

  unsigned IPrev = 1;

  for (auto &I : ReadyCycles) {

    if (I.second > IPrev + 1)

      dbgs() << "****************************** BUBBLE OF " << I.second - IPrev

             << " CYCLES DETECTED ******************************\n\n";

    dbgs() << "[ " << I.second << " ]  :  " << *I.first << "\n";

    IPrev = I.second;

  }

}


#endif


ScheduleMetrics


GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &SU : InputSchedule) {

    unsigned ReadyCycle =

        computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


ScheduleMetrics


GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &MI : DAG) {

    SUnit *SU = DAG.getSUnit(&MI);

    if (!SU)

      continue;

    unsigned ReadyCycle =

        computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (WavesAfter < DAG.MinOccupancy)

    return true;


  // For dynamic VGPR mode, we don't want to waste any VGPR blocks.

  if (DAG.MFI.isDynamicVGPREnabled()) {

    unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        &ST, DAG.MFI.getDynamicVGPRBlockSize(),

        PressureBefore.getVGPRNum(false));

    unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        &ST, DAG.MFI.getDynamicVGPRBlockSize(),

        PressureAfter.getVGPRNum(false));

    if (BlocksAfter > BlocksBefore)

      return true;

  }


  return false;

}


bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {

  // If RP is not reduced in the unclustered reschedule stage, revert to the

  // old schedule.

  if ((WavesAfter <=

           PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) &&

       mayCauseSpilling(WavesAfter)) ||

      GCNSchedStage::shouldRevertScheduling(WavesAfter)) {

    LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");

    return true;

  }


  // Do not attempt to relax schedule even more if we are already spilling.

  if (isRegionWithExcessRP())

    return false;


  LLVM_DEBUG(

      dbgs()

      << "\n\t      *** In shouldRevertScheduling ***\n"

      << "      *********** BEFORE UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);

  LLVM_DEBUG(

      dbgs()

      << "\n      *********** AFTER UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MAfter = getScheduleMetrics(DAG);

  unsigned OldMetric = MBefore.getMetric();

  unsigned NewMetric = MAfter.getMetric();

  unsigned WavesBefore = std::min(

      S.getTargetOccupancy(),

      PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()));

  unsigned Profit =

      ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *

       ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /

       NewMetric) /

      ScheduleMetrics::ScaleFactor;

  LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "

                    << MAfter << "Profit: " << Profit << "\n");

  return Profit < ScheduleMetrics::ScaleFactor;

}


bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {

  return GCNSchedStage::shouldRevertScheduling(WavesAfter) ||

         mayCauseSpilling(WavesAfter) || (TargetOcc && WavesAfter < TargetOcc);

}


bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool MemoryClauseInitialScheduleStage::shouldRevertScheduling(

    unsigned WavesAfter) {

  return mayCauseSpilling(WavesAfter);

}


bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {

  if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&

      !PressureAfter.less(MF, PressureBefore)) {

    LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");

    return true;

  }


  return false;

}


void GCNSchedStage::revertScheduling() {

  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");

  DAG.RegionEnd = DAG.RegionBegin;

  int SkippedDebugInstr = 0;

  for (MachineInstr *MI : Unsched) {

    if (MI->isDebugInstr()) {

      ++SkippedDebugInstr;

      continue;

    }


    if (MI->getIterator() != DAG.RegionEnd) {

      DAG.BB->splice(DAG.RegionEnd, DAG.BB, MI);

      if (!MI->isDebugInstr())

        DAG.LIS->handleMove(*MI, true);

    }


    // Reset read-undef flags and update them later.

    for (auto &Op : MI->all_defs())

      Op.setIsUndef(false);

    RegisterOperands RegOpers;

    RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);

    if (!MI->isDebugInstr()) {

      if (DAG.ShouldTrackLaneMasks) {

        // Adjust liveness and add missing dead+read-undef flags.

        SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();

        RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);

      } else {

        // Adjust for missing dead-def flags.

        RegOpers.detectDeadDefs(*MI, *DAG.LIS);

      }

    }

    DAG.RegionEnd = MI->getIterator();

    ++DAG.RegionEnd;

    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);

  }


  // After reverting schedule, debug instrs will now be at the end of the block

  // and RegionEnd will point to the first debug instr. Increment RegionEnd

  // pass debug instrs to the actual end of the scheduling region.

  while (SkippedDebugInstr-- > 0)

    ++DAG.RegionEnd;


  // If Unsched.front() instruction is a debug instruction, this will actually

  // shrink the region since we moved all debug instructions to the end of the

  // block. Find the first instruction that is not a debug instruction.

  DAG.RegionBegin = Unsched.front()->getIterator();

  if (DAG.RegionBegin->isDebugInstr()) {

    for (MachineInstr *MI : Unsched) {

      if (MI->isDebugInstr())

        continue;

      DAG.RegionBegin = MI->getIterator();

      break;

    }

  }


  // Then move the debug instructions back into their correct place and set

  // RegionBegin and RegionEnd if needed.

  DAG.placeDebugValues();


  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);

}


bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {

  const Function &F = MF.getFunction();


  // Maps optimizable regions (i.e., regions at minimum and register-limited

  // occupancy, or regions with spilling) to the target RP we would like to

  // reach.

  DenseMap<unsigned, GCNRPTarget> OptRegions;

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);

  auto ResetTargetRegions = [&]() {

    OptRegions.clear();

    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

      const GCNRegPressure &RP = DAG.Pressure[I];

      GCNRPTarget Target(MaxSGPRs, MaxVGPRs, MF, RP);

      if (!Target.satisfied())

        OptRegions.insert({I, Target});

    }

  };


  ResetTargetRegions();

  if (!OptRegions.empty() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {

    // In addition to register usage being above addressable limits, occupancy

    // below the minimum is considered like "spilling" as well.

    TargetOcc = std::nullopt;

  } else {

    // There is no spilling and room to improve occupancy; set up "increased

    // occupancy targets" for all regions.

    TargetOcc = DAG.MinOccupancy + 1;

    unsigned VGPRBlockSize =

        MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();

    MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);

    MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);

    ResetTargetRegions();

  }

  REMAT_DEBUG({

    dbgs() << "Analyzing ";

    MF.getFunction().printAsOperand(dbgs(), false);

    dbgs() << ": ";

    if (OptRegions.empty()) {

      dbgs() << "no objective to achieve, occupancy is maximal at "

             << MFI.getMaxWavesPerEU();

    } else if (!TargetOcc) {

      dbgs() << "reduce spilling (minimum target occupancy is "

             << MFI.getMinWavesPerEU() << ')';

    } else {

      dbgs() << "increase occupancy from " << DAG.MinOccupancy << " to "

             << TargetOcc;

    }

    dbgs() << '\n';

    for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

      if (auto OptIt = OptRegions.find(I); OptIt != OptRegions.end()) {

        dbgs() << REMAT_PREFIX << "  [" << I << "] " << OptIt->getSecond()

               << '\n';

      }

    }

  });

  if (OptRegions.empty())

    return false;


  // Accounts for a reduction in RP in an optimizable region. Returns whether we

  // estimate that we have identified enough rematerialization opportunities to

  // achieve our goal, and sets Progress to true when this particular reduction

  // in pressure was helpful toward that goal.

  auto ReduceRPInRegion = [&](auto OptIt, Register Reg, LaneBitmask Mask,

                              bool &Progress) -> bool {

    GCNRPTarget &Target = OptIt->getSecond();

    if (!Target.isSaveBeneficial(Reg))

      return false;

    Progress = true;

    Target.saveReg(Reg, Mask, DAG.MRI);

    if (Target.satisfied())

      OptRegions.erase(OptIt->getFirst());

    return OptRegions.empty();

  };


  // We need up-to-date live-out info. to query live-out register masks in

  // regions containing rematerializable instructions.

  DAG.RegionLiveOuts.buildLiveRegMap();


  // Cache set of registers that are going to be rematerialized.

  DenseSet<unsigned> RematRegs;


  // Identify rematerializable instructions in the function.

  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

    auto Region = DAG.Regions[I];

    for (auto MI = Region.first; MI != Region.second; ++MI) {

      // The instruction must be rematerializable.

      MachineInstr &DefMI = *MI;

      if (!isReMaterializable(DefMI))

        continue;


      // We only support rematerializing virtual registers with one definition.

      Register Reg = DefMI.getOperand(0).getReg();

      if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))

        continue;


      // We only care to rematerialize the instruction if it has a single

      // non-debug user in a different region. The using MI may not belong to a

      // region if it is a lone region terminator.

      MachineInstr *UseMI = DAG.MRI.getOneNonDBGUser(Reg);

      if (!UseMI)

        continue;

      auto UseRegion = MIRegion.find(UseMI);

      if (UseRegion != MIRegion.end() && UseRegion->second == I)

        continue;


      // Do not rematerialize an instruction if it uses or is used by an

      // instruction that we have designated for rematerialization.

      // FIXME: Allow for rematerialization chains: this requires 1. updating

      // remat points to account for uses that are rematerialized, and 2. either

      // rematerializing the candidates in careful ordering, or deferring the

      // MBB RP walk until the entire chain has been rematerialized.

      if (Rematerializations.contains(UseMI) ||

          llvm::any_of(DefMI.operands(), [&RematRegs](MachineOperand &MO) {

            return MO.isReg() && RematRegs.contains(MO.getReg());

          }))

        continue;


      // Do not rematerialize an instruction it it uses registers that aren't

      // available at its use. This ensures that we are not extending any live

      // range while rematerializing.

      SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);

      if (!VirtRegAuxInfo::allUsesAvailableAt(&DefMI, UseIdx, *DAG.LIS, DAG.MRI,

                                              *DAG.TII))

        continue;


      REMAT_DEBUG(dbgs() << "Region " << I << ": remat instruction " << DefMI);

      RematInstruction &Remat =

          Rematerializations.try_emplace(&DefMI, UseMI).first->second;


      bool RematUseful = false;

      if (auto It = OptRegions.find(I); It != OptRegions.end()) {

        // Optimistically consider that moving the instruction out of its

        // defining region will reduce RP in the latter; this assumes that

        // maximum RP in the region is reached somewhere between the defining

        // instruction and the end of the region.

        REMAT_DEBUG(dbgs() << "  Defining region is optimizable\n");

        LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];

        if (ReduceRPInRegion(It, Reg, Mask, RematUseful))

          return true;

      }


      for (unsigned LIRegion = 0; LIRegion != E; ++LIRegion) {

        // We are only collecting regions in which the register is a live-in

        // (and may be live-through).

        auto It = DAG.LiveIns[LIRegion].find(Reg);

        if (It == DAG.LiveIns[LIRegion].end() || It->second.none())

          continue;

        Remat.LiveInRegions.insert(LIRegion);


        // Account for the reduction in RP due to the rematerialization in an

        // optimizable region in which the defined register is a live-in. This

        // is exact for live-through region but optimistic in the using region,

        // where RP is actually reduced only if maximum RP is reached somewhere

        // between the beginning of the region and the rematerializable

        // instruction's use.

        if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {

          REMAT_DEBUG(dbgs() << "  Live-in in region " << LIRegion << '\n');

          if (ReduceRPInRegion(It, Reg, DAG.LiveIns[LIRegion][Reg],

                               RematUseful))

            return true;

        }

      }


      // If the instruction is not a live-in or live-out in any optimizable

      // region then there is no point in rematerializing it.

      if (!RematUseful) {

        Rematerializations.pop_back();

        REMAT_DEBUG(dbgs() << "  No impact, not rematerializing instruction\n");

      } else {

        RematRegs.insert(Reg);

      }

    }

  }


  if (TargetOcc) {

    // We were trying to increase occupancy but failed, abort the stage.

    REMAT_DEBUG(dbgs() << "Cannot increase occupancy\n");

    Rematerializations.clear();

    return false;

  }

  REMAT_DEBUG(dbgs() << "Can reduce but not eliminate spilling\n");

  return !Rematerializations.empty();

}


void PreRARematStage::rematerialize() {

  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();


  // Collect regions whose RP changes in unpredictable way; we will have to

  // fully recompute their RP after all rematerailizations.

  DenseSet<unsigned> RecomputeRP;


  // Rematerialize all instructions.

  for (auto &[DefMI, Remat] : Rematerializations) {

    MachineBasicBlock::iterator InsertPos(Remat.UseMI);

    Register Reg = DefMI->getOperand(0).getReg();

    unsigned DefRegion = MIRegion.at(DefMI);


    // Rematerialize DefMI to its use block.

    TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,

                       AMDGPU::NoSubRegister, *DefMI, *DAG.TRI);

    Remat.RematMI = &*std::prev(InsertPos);

    DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);


    // Update region boundaries in regions we sinked from (remove defining MI)

    // and to (insert MI rematerialized in use block). Only then we can erase

    // the original MI.

    DAG.updateRegionBoundaries(DAG.Regions[DefRegion], DefMI, nullptr);

    auto UseRegion = MIRegion.find(Remat.UseMI);

    if (UseRegion != MIRegion.end()) {

      DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], InsertPos,

                                 Remat.RematMI);

    }

    DAG.LIS->RemoveMachineInstrFromMaps(*DefMI);

    DefMI->eraseFromParent();


    // Collect all regions impacted by the rematerialization and update their

    // live-in/RP information.

    for (unsigned I : Remat.LiveInRegions) {

      ImpactedRegions.insert({I, DAG.Pressure[I]});

      GCNRPTracker::LiveRegSet &RegionLiveIns = DAG.LiveIns[I];


#ifdef EXPENSIVE_CHECKS

      // All uses are known to be available / live at the remat point. Thus, the

      // uses should already be live in to the region.

      for (MachineOperand &MO : DefMI->operands()) {

        if (!MO.isReg() || !MO.getReg() || !MO.readsReg())

          continue;


        Register UseReg = MO.getReg();

        if (!UseReg.isVirtual())

          continue;


        LiveInterval &LI = DAG.LIS->getInterval(UseReg);

        LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());

        if (LI.hasSubRanges() && MO.getSubReg())

          LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());


        LaneBitmask LiveInMask = RegionLiveIns.at(UseReg);

        LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);

        // If this register has lanes not covered by the LiveIns, be sure they

        // do not map to any subrange. ref:

        // machine-scheduler-sink-trivial-remats.mir::omitted_subrange

        if (UncoveredLanes.any()) {

          assert(LI.hasSubRanges());

          for (LiveInterval::SubRange &SR : LI.subranges())

            assert((SR.LaneMask & UncoveredLanes).none());

        }

      }

#endif


      // The register is no longer a live-in in all regions but the one that

      // contains the single use. In live-through regions, maximum register

      // pressure decreases predictably so we can directly update it. In the

      // using region, maximum RP may or may not decrease, so we will mark it

      // for re-computation after all materializations have taken place.

      LaneBitmask PrevMask = RegionLiveIns[Reg];

      RegionLiveIns.erase(Reg);

      RegMasks.insert({{I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});

      if (Remat.UseMI->getParent() != DAG.Regions[I].first->getParent())

        DAG.Pressure[I].inc(Reg, PrevMask, LaneBitmask::getNone(), DAG.MRI);

      else

        RecomputeRP.insert(I);

    }

    // RP in the region from which the instruction was rematerialized may or may

    // not decrease.

    ImpactedRegions.insert({DefRegion, DAG.Pressure[DefRegion]});

    RecomputeRP.insert(DefRegion);


    // Recompute live interval to reflect the register's rematerialization.

    Register RematReg = Remat.RematMI->getOperand(0).getReg();

    DAG.LIS->removeInterval(RematReg);

    DAG.LIS->createAndComputeVirtRegInterval(RematReg);

  }


  // All regions impacted by at least one rematerialization must be rescheduled.

  // Maximum pressure must also be recomputed for all regions where it changed

  // non-predictably and checked against the target occupancy.

  unsigned DynamicVGPRBlockSize =

      MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();

  AchievedOcc = MFI.getMaxWavesPerEU();

  for (auto &[I, OriginalRP] : ImpactedRegions) {

    bool IsEmptyRegion = DAG.Regions[I].first == DAG.Regions[I].second;

    RescheduleRegions[I] = !IsEmptyRegion;

    if (!RecomputeRP.contains(I))

      continue;


    GCNRegPressure RP;

    if (IsEmptyRegion) {

      RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);

    } else {

      GCNDownwardRPTracker RPT(*DAG.LIS);

      auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,

                                                      DAG.Regions[I].second);

      if (NonDbgMI == DAG.Regions[I].second) {

        // Region is non-empty but contains only debug instructions.

        RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);

      } else {

        RPT.reset(*NonDbgMI, &DAG.LiveIns[I]);

        RPT.advance(DAG.Regions[I].second);

        RP = RPT.moveMaxPressure();

      }

    }

    DAG.Pressure[I] = RP;

    AchievedOcc =

        std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));

  }

  REMAT_DEBUG(dbgs() << "Achieved occupancy " << AchievedOcc << "\n");

}


// Copied from MachineLICM

bool PreRARematStage::isReMaterializable(const MachineInstr &MI) {

  if (!DAG.TII->isReMaterializable(MI))

    return false;


  for (const MachineOperand &MO : MI.all_uses()) {

    // We can't remat physreg uses, unless it is a constant or an ignorable

    // use (e.g. implicit exec use on VALU instructions)

    if (MO.getReg().isPhysical()) {

      if (DAG.MRI.isConstantPhysReg(MO.getReg()) || DAG.TII->isIgnorableUse(MO))

        continue;

      return false;

    }

  }


  return true;

}


void PreRARematStage::finalizeGCNSchedStage() {

  // We consider that reducing spilling is always beneficial so we never

  // rollback rematerializations in such cases. It's also possible that

  // rescheduling lowers occupancy over the one achieved just through remats, in

  // which case we do not want to rollback either (the rescheduling was already

  // reverted in PreRARematStage::shouldRevertScheduling in such cases).

  unsigned MaxOcc = std::max(AchievedOcc, DAG.MinOccupancy);

  if (!TargetOcc || MaxOcc >= *TargetOcc)

    return;


  REMAT_DEBUG(dbgs() << "Rolling back all rematerializations\n");

  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();


  // Rollback the rematerializations.

  for (const auto &[DefMI, Remat] : Rematerializations) {

    MachineInstr &RematMI = *Remat.RematMI;

    unsigned DefRegion = MIRegion.at(DefMI);

    MachineBasicBlock::iterator InsertPos(DAG.Regions[DefRegion].second);

    MachineBasicBlock *MBB = RegionBB[DefRegion];

    Register Reg = RematMI.getOperand(0).getReg();


    // Re-rematerialize MI at the end of its original region. Note that it may

    // not be rematerialized exactly in the same position as originally within

    // the region, but it should not matter much.

    TII->reMaterialize(*MBB, InsertPos, Reg, AMDGPU::NoSubRegister, RematMI,

                       *DAG.TRI);

    MachineInstr *NewMI = &*std::prev(InsertPos);

    DAG.LIS->InsertMachineInstrInMaps(*NewMI);


    auto UseRegion = MIRegion.find(Remat.UseMI);

    if (UseRegion != MIRegion.end()) {

      DAG.updateRegionBoundaries(DAG.Regions[UseRegion->second], RematMI,

                                 nullptr);

    }

    DAG.updateRegionBoundaries(DAG.Regions[DefRegion], InsertPos, NewMI);


    // Erase rematerialized MI.

    DAG.LIS->RemoveMachineInstrFromMaps(RematMI);

    RematMI.eraseFromParent();


    // Recompute live interval for the re-rematerialized register

    DAG.LIS->removeInterval(Reg);

    DAG.LIS->createAndComputeVirtRegInterval(Reg);


    // Re-add the register as a live-in in all regions it used to be one in.

    for (unsigned LIRegion : Remat.LiveInRegions)

      DAG.LiveIns[LIRegion].insert({Reg, RegMasks.at({LIRegion, Reg})});

  }


  // Reset RP in all impacted regions.

  for (auto &[I, OriginalRP] : ImpactedRegions)

    DAG.Pressure[I] = OriginalRP;


  GCNSchedStage::finalizeGCNSchedStage();

}


void GCNScheduleDAGMILive::updateRegionBoundaries(

    RegionBoundaries &RegionBounds, MachineBasicBlock::iterator MI,

    MachineInstr *NewMI) {

  assert((!NewMI || NewMI != RegionBounds.second) &&

         "cannot remove at region end");


  if (RegionBounds.first == RegionBounds.second) {

    assert(NewMI && "cannot remove from an empty region");

    RegionBounds.first = NewMI;

    return;

  }


  // We only care for modifications at the beginning of a non-empty region since

  // the upper region boundary is exclusive.

  if (MI != RegionBounds.first)

    return;

  if (!NewMI)

    RegionBounds.first = std::next(MI); // Removal

  else

    RegionBounds.first = NewMI; // Insertion

}


static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {

  const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);

  return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {

    return SII->isIGLPMutationOnly(MI->getOpcode());

  });

}


GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,

    bool RemoveKillFlags)

    : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}


void GCNPostScheduleDAGMILive::schedule() {

  HasIGLPInstrs = hasIGLPInstrs(this);

  if (HasIGLPInstrs) {

    SavedMutations.clear();

    SavedMutations.swap(Mutations);

    addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));

  }


  ScheduleDAGMI::schedule();

}


void GCNPostScheduleDAGMILive::finalizeSchedule() {

  if (HasIGLPInstrs)

    SavedMutations.swap(Mutations);


  ScheduleDAGMI::finalizeSchedule();

}


UseMI
MachineInstrBuilder & UseMI
Definition AArch64ExpandPseudoInsts.cpp:120

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition AArch64ExpandPseudoInsts.cpp:121

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUBaseInfo.h

AMDGPUIGroupLP.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CalcSpillWeights.h

GCNRegPressure.h
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...

GCNTrackers
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

DisableClusteredLowOccupancy
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

REMAT_PREFIX
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
Definition GCNSchedStrategy.cpp:1121

getLastMIForRegion
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
Definition GCNSchedStrategy.cpp:833

RelaxedOcc
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

REMAT_DEBUG
#define REMAT_DEBUG(X)
Definition GCNSchedStrategy.cpp:1122

DisableUnclusterHighRP
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

printScheduleModel
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
Definition GCNSchedStrategy.cpp:1406

PrintMaxRPRegUsageAfterScheduler
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))

hasIGLPInstrs
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
Definition GCNSchedStrategy.cpp:2074

canUsePressureDiffs
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
Definition GCNSchedStrategy.cpp:165

getRegisterPressures
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
Definition GCNSchedStrategy.cpp:181

PrintMaxRPRegUsageBeforeScheduler
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))

ScheduleMetricBias
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

GCNSchedStrategy.h

UseReg
static Register UseReg(const MachineOperand &MO)
Definition HexagonCopyToCombine.cpp:245

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

Reg
Register Reg
Definition MachineSink.cpp:2117

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

T
#define T
Definition Mips16ISelLowering.cpp:353

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

RegisterClassInfo.h

SIMachineFunctionInfo.h

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41

llvm::ClusteredLowOccStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1101

llvm::ClusteredLowOccStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1566

llvm::ClusteredLowOccStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1262

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition DenseMap.h:311

llvm::DenseMapBase::empty
bool empty() const
Definition DenseMap.h:109

llvm::DenseMapBase::end
iterator end()
Definition DenseMap.h:81

llvm::DenseMapBase::at
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:213

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222

llvm::DenseMapBase::clear
void clear()
Definition DenseMap.h:121

llvm::DenseMap
Definition DenseMap.h:701

llvm::Function
Definition Function.h:64

llvm::GCNDownwardRPTracker
Definition GCNRegPressure.h:367

llvm::GCNDownwardRPTracker::bumpDownwardPressure
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
Definition GCNRegPressure.cpp:737

llvm::GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:553

llvm::GCNMaxILPSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
Definition GCNSchedStrategy.cpp:558

llvm::GCNMaxMemoryClauseSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
Definition GCNSchedStrategy.cpp:660

llvm::GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:644

llvm::GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
Definition GCNSchedStrategy.cpp:543

llvm::GCNPostScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition GCNSchedStrategy.cpp:2097

llvm::GCNPostScheduleDAGMILive::schedule
void schedule() override
Orders nodes according to selected style.
Definition GCNSchedStrategy.cpp:2086

llvm::GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Definition GCNSchedStrategy.cpp:2081

llvm::GCNRPTarget
Models a register pressure target, allowing to evaluate and track register savings against that targe...
Definition GCNRegPressure.h:201

llvm::GCNRPTracker
Definition GCNRegPressure.h:277

llvm::GCNRPTracker::getPressure
GCNRegPressure getPressure() const
Definition GCNRegPressure.h:307

llvm::GCNRPTracker::LiveRegSet
DenseMap< unsigned, LaneBitmask > LiveRegSet
Definition GCNRegPressure.h:279

llvm::GCNSchedStage::initGCNRegion
virtual bool initGCNRegion()
Definition GCNSchedStrategy.cpp:1185

llvm::GCNSchedStage::S
GCNSchedStrategy & S
Definition GCNSchedStrategy.h:314

llvm::GCNSchedStage::PressureBefore
GCNRegPressure PressureBefore
Definition GCNSchedStrategy.h:334

llvm::GCNSchedStage::revertScheduling
void revertScheduling()
Definition GCNSchedStrategy.cpp:1606

llvm::GCNSchedStage::isRegionWithExcessRP
bool isRegionWithExcessRP() const
Definition GCNSchedStrategy.h:375

llvm::GCNSchedStage::mayCauseSpilling
bool mayCauseSpilling(unsigned WavesAfter)
Definition GCNSchedStrategy.cpp:1596

llvm::GCNSchedStage::getScheduleMetrics
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
Definition GCNSchedStrategy.cpp:1427

llvm::GCNSchedStage::DAG
GCNScheduleDAGMILive & DAG
Definition GCNSchedStrategy.h:312

llvm::GCNSchedStage::StageID
const GCNSchedStageID StageID
Definition GCNSchedStrategy.h:322

llvm::GCNSchedStage::Unsched
std::vector< MachineInstr * > Unsched
Definition GCNSchedStrategy.h:331

llvm::GCNSchedStage::PressureAfter
GCNRegPressure PressureAfter
Definition GCNSchedStrategy.h:337

llvm::GCNSchedStage::MF
MachineFunction & MF
Definition GCNSchedStrategy.h:316

llvm::GCNSchedStage::finalizeGCNRegion
void finalizeGCNRegion()
Definition GCNSchedStrategy.cpp:1292

llvm::GCNSchedStage::MFI
SIMachineFunctionInfo & MFI
Definition GCNSchedStrategy.h:318

llvm::GCNSchedStage::checkScheduling
void checkScheduling()
Definition GCNSchedStrategy.cpp:1309

llvm::GCNSchedStage::RegionIdx
unsigned RegionIdx
Definition GCNSchedStrategy.h:328

llvm::GCNSchedStage::advanceRegion
void advanceRegion()
Definition GCNSchedStrategy.h:388

llvm::GCNSchedStage::computeSUnitReadyCycle
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
Definition GCNSchedStrategy.cpp:1382

llvm::GCNSchedStage::finalizeGCNSchedStage
virtual void finalizeGCNSchedStage()
Definition GCNSchedStrategy.cpp:1168

llvm::GCNSchedStage::initGCNSchedStage
virtual bool initGCNSchedStage()
Definition GCNSchedStrategy.cpp:1062

llvm::GCNSchedStage::shouldRevertScheduling
virtual bool shouldRevertScheduling(unsigned WavesAfter)
Definition GCNSchedStrategy.cpp:1495

llvm::GCNSchedStage::SavedMutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
Definition GCNSchedStrategy.h:339

llvm::GCNSchedStage::GCNSchedStage
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Definition GCNSchedStrategy.cpp:1058

llvm::GCNSchedStage::setupNewBlock
void setupNewBlock()
Definition GCNSchedStrategy.cpp:1278

llvm::GCNSchedStage::CurrentMBB
MachineBasicBlock * CurrentMBB
Definition GCNSchedStrategy.h:325

llvm::GCNSchedStage::ST
const GCNSubtarget & ST
Definition GCNSchedStrategy.h:320

llvm::GCNSchedStrategy
This is a minimal scheduler strategy.
Definition GCNSchedStrategy.h:45

llvm::GCNSchedStrategy::DownwardTracker
GCNDownwardRPTracker DownwardTracker
Definition GCNSchedStrategy.h:77

llvm::GCNSchedStrategy::GCNSchedStrategy
GCNSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:89

llvm::GCNSchedStrategy::SchedStages
SmallVector< GCNSchedStageID, 4 > SchedStages
Definition GCNSchedStrategy.h:71

llvm::GCNSchedStrategy::HasHighPressure
bool HasHighPressure
Definition GCNSchedStrategy.h:85

llvm::GCNSchedStrategy::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode)
Definition GCNSchedStrategy.cpp:382

llvm::GCNSchedStrategy::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool IsBottomUp)
Definition GCNSchedStrategy.cpp:340

llvm::GCNSchedStrategy::SGPRCriticalLimit
unsigned SGPRCriticalLimit
Definition GCNSchedStrategy.h:102

llvm::GCNSchedStrategy::MaxPressure
std::vector< unsigned > MaxPressure
Definition GCNSchedStrategy.h:60

llvm::GCNSchedStrategy::hasNextStage
bool hasNextStage() const
Definition GCNSchedStrategy.cpp:533

llvm::GCNSchedStrategy::TargetOccupancy
unsigned TargetOccupancy
Definition GCNSchedStrategy.h:66

llvm::GCNSchedStrategy::KnownExcessRP
bool KnownExcessRP
Definition GCNSchedStrategy.h:89

llvm::GCNSchedStrategy::getCurrentStage
GCNSchedStageID getCurrentStage()
Definition GCNSchedStrategy.cpp:518

llvm::GCNSchedStrategy::VGPRExcessLimit
unsigned VGPRExcessLimit
Definition GCNSchedStrategy.h:64

llvm::GCNSchedStrategy::MF
MachineFunction * MF
Definition GCNSchedStrategy.h:68

llvm::GCNSchedStrategy::advanceStage
bool advanceStage()
Definition GCNSchedStrategy.cpp:523

llvm::GCNSchedStrategy::CurrentStage
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
Definition GCNSchedStrategy.h:74

llvm::GCNSchedStrategy::VGPRCriticalLimit
unsigned VGPRCriticalLimit
Definition GCNSchedStrategy.h:104

llvm::GCNSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition GCNSchedStrategy.cpp:508

llvm::GCNSchedStrategy::getDownwardTracker
GCNDownwardRPTracker * getDownwardTracker()
Definition GCNSchedStrategy.h:131

llvm::GCNSchedStrategy::SGPRLimitBias
unsigned SGPRLimitBias
Definition GCNSchedStrategy.h:106

llvm::GCNSchedStrategy::SGPRExcessLimit
unsigned SGPRExcessLimit
Definition GCNSchedStrategy.h:62

llvm::GCNSchedStrategy::Pressure
std::vector< unsigned > Pressure
Definition GCNSchedStrategy.h:58

llvm::GCNSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition GCNSchedStrategy.cpp:94

llvm::GCNSchedStrategy::UpwardTracker
GCNUpwardRPTracker UpwardTracker
Definition GCNSchedStrategy.h:80

llvm::GCNSchedStrategy::ErrorMargin
unsigned ErrorMargin
Definition GCNSchedStrategy.h:94

llvm::GCNSchedStrategy::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
Definition GCNSchedStrategy.cpp:215

llvm::GCNSchedStrategy::VGPRLimitBias
unsigned VGPRLimitBias
Definition GCNSchedStrategy.h:108

llvm::GCNSchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
Definition GCNSchedStrategy.cpp:463

llvm::GCNSchedStrategy::getUpwardTracker
GCNUpwardRPTracker * getUpwardTracker()
Definition GCNSchedStrategy.h:133

llvm::GCNSchedStrategy::getNextStage
GCNSchedStageID getNextStage() const
Definition GCNSchedStrategy.cpp:538

llvm::GCNScheduleDAGMILive
Definition GCNSchedStrategy.h:224

llvm::GCNScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition GCNSchedStrategy.cpp:953

llvm::GCNScheduleDAGMILive::schedule
void schedule() override
Orders nodes according to selected style.
Definition GCNSchedStrategy.cpp:819

llvm::GCNScheduleDAGMILive::GCNScheduleDAGMILive
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition GCNSchedStrategy.cpp:778

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:1705

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition GCNSubtarget.h:1624

llvm::GCNUpwardRPTracker
Definition GCNRegPressure.h:322

llvm::GCNUpwardRPTracker::recede
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
Definition GCNRegPressure.cpp:518

llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition MachineScheduler.cpp:3383

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition MachineScheduler.h:1213

llvm::GenericSchedulerBase::setPolicy
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition MachineScheduler.cpp:3302

llvm::GenericSchedulerBase::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition MachineScheduler.h:1211

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition MachineScheduler.h:1205

llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition MachineScheduler.h:1204

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition MachineScheduler.h:1098

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition MachineScheduler.h:1103

llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition MachineScheduler.h:1105

llvm::GenericSchedulerBase::ResourceReduce
@ ResourceReduce
Definition MachineScheduler.h:1104

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition MachineScheduler.h:1101

llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition MachineScheduler.h:1095

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition MachineScheduler.h:1099

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition MachineScheduler.h:1097

llvm::GenericSchedulerBase::Stall
@ Stall
Definition MachineScheduler.h:1100

llvm::GenericSchedulerBase::Weak
@ Weak
Definition MachineScheduler.h:1102

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition MachineScheduler.h:1206

llvm::GenericScheduler::BotCand
SchedCandidate BotCand
Candidate last picked from Bot boundary.
Definition MachineScheduler.h:1312

llvm::GenericScheduler::Top
SchedBoundary Top
Definition MachineScheduler.h:1303

llvm::GenericScheduler::TopCand
SchedCandidate TopCand
Candidate last picked from Top boundary.
Definition MachineScheduler.h:1310

llvm::GenericScheduler::TopClusterID
unsigned TopClusterID
Definition MachineScheduler.h:1306

llvm::GenericScheduler::Bot
SchedBoundary Bot
Definition MachineScheduler.h:1304

llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition MachineScheduler.cpp:3920

llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition MachineScheduler.h:1300

llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition MachineScheduler.cpp:3637

llvm::GenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition MachineScheduler.cpp:4237

llvm::GenericScheduler::GenericScheduler
GenericScheduler(const MachineSchedContext *C)
Definition MachineScheduler.h:1257

llvm::GenericScheduler::BotClusterID
unsigned BotClusterID
Definition MachineScheduler.h:1307

llvm::ILPInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1584

llvm::LiveInterval::hasSubRanges
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition LiveInterval.h:821

llvm::LiveInterval::subranges
iterator_range< subrange_iterator > subranges()
Definition LiveInterval.h:793

llvm::LiveIntervals::dump
LLVM_ABI void dump() const
Definition LiveIntervals.cpp:216

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition MachineBasicBlock.h:443

llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition MachineBasicBlock.h:455

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:377

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:379

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:341

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition MachineInstrBuilder.h:123

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition MachineInstr.h:1136

llvm::MachineInstr::operands
mop_range operands()
Definition MachineInstr.h:693

llvm::MachineInstr::eraseFromParent
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition MachineInstr.cpp:770

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:595

llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition MachineOperand.h:373

llvm::MachineOperand::readsReg
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
Definition MachineOperand.h:466

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:328

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:368

llvm::MemoryClauseInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1591

llvm::OccInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1514

llvm::PreRARematStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1579

llvm::PreRARematStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1274

llvm::PreRARematStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1124

llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition RegisterPressure.h:103

llvm::PressureChange::setUnitInc
void setUnitInc(int Inc)
Definition RegisterPressure.h:127

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition MachineScheduler.h:564

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition RegisterPressure.h:361

llvm::RegPressureTracker::advance
LLVM_ABI void advance()
Advance across the current instruction.
Definition RegisterPressure.cpp:933

llvm::RegPressureTracker::getDownwardPressure
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Definition RegisterPressure.cpp:1371

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition RegisterPressure.h:467

llvm::RegPressureTracker::getUpwardPressure
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
Definition RegisterPressure.cpp:1355

llvm::RegionBase::getParent
RegionT * getParent() const
Get the parent of the Region.
Definition RegionInfo.h:362

llvm::RegionPressureMap::buildLiveRegMap
void buildLiveRegMap()
Definition GCNSchedStrategy.cpp:939

llvm::Region
Definition RegionInfo.h:887

llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition RegisterPressure.h:167

llvm::RegisterOperands::collect
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition RegisterPressure.cpp:562

llvm::RegisterOperands::adjustLaneLiveness
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Definition RegisterPressure.cpp:593

llvm::RegisterOperands::detectDeadDefs
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition RegisterPressure.cpp:573

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIInstrInfo::isIGLPMutationOnly
bool isIGLPMutationOnly(unsigned Opcode) const
Definition SIInstrInfo.h:1062

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:412

llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition SIMachineFunctionInfo.h:1178

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition SIMachineFunctionInfo.h:848

llvm::SIMachineFunctionInfo::getMinAllowedOccupancy
unsigned getMinAllowedOccupancy() const
Definition SIMachineFunctionInfo.h:1182

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition ScheduleDAG.h:387

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition ScheduleDAG.h:277

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition ScheduleDAG.h:312

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition ScheduleDAG.h:305

llvm::SUnit::ParentClusterIdx
unsigned ParentClusterIdx
The parent cluster id.
Definition ScheduleDAG.h:288

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition ScheduleDAG.h:476

llvm::SUnit::isTopReady
bool isTopReady() const
Definition ScheduleDAG.h:473

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition ScheduleDAG.h:269

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition ScheduleDAG.h:399

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition MachineScheduler.h:856

llvm::SchedBoundary::getLatencyStallCycles
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition MachineScheduler.cpp:2581

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition MachineScheduler.h:865

llvm::SchedBoundary::isTop
bool isTop() const
Definition MachineScheduler.h:980

llvm::SchedBoundary::Available
ReadyQueue Available
Definition MachineScheduler.h:869

llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition MachineScheduler.h:988

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition ScheduleDAGInstrs.h:116

llvm::ScheduleDAGInstrs::ScheduleSingleMIRegions
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
Definition ScheduleDAGInstrs.h:129

llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition ScheduleDAGInstrs.h:151

llvm::ScheduleDAGInstrs::finalizeSchedule
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition ScheduleDAGInstrs.h:364

llvm::ScheduleDAGInstrs::exitRegion
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Definition ScheduleDAGInstrs.cpp:208

llvm::ScheduleDAGInstrs::MLI
const MachineLoopInfo * MLI
Definition ScheduleDAGInstrs.h:118

llvm::ScheduleDAGInstrs::RemoveKillFlags
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
Definition ScheduleDAGInstrs.h:126

llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition ScheduleDAGInstrs.h:148

llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition MachineScheduler.cpp:1689

llvm::ScheduleDAGMILive::ScheduleDAGMILive
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition MachineScheduler.h:461

llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition MachineScheduler.h:445

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition MachineScheduler.h:308

llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition MachineScheduler.h:356

llvm::ScheduleDAGMI::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition MachineScheduler.cpp:1055

llvm::ScheduleDAGMI::ScheduleDAGMI
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Definition MachineScheduler.h:330

llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition MachineScheduler.h:311

llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition MachineScheduler.h:315

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition ScheduleDAG.h:587

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition ScheduleDAG.h:584

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition ScheduleDAG.h:586

llvm::ScheduleMetrics
Definition GCNSchedStrategy.h:166

llvm::ScheduleMetrics::ScaleFactor
static const unsigned ScaleFactor
Definition GCNSchedStrategy.h:182

llvm::ScheduleMetrics::getMetric
unsigned getMetric() const
Definition GCNSchedStrategy.h:176

llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66

llvm::SlotIndexes::getMBBStartIdx
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition SlotIndexes.h:461

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:79

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:82

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::Target
Target - Wrapper for Target specific information.
Definition TargetRegistry.h:146

llvm::UnclusteredHighRPStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1070

llvm::UnclusteredHighRPStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1247

llvm::UnclusteredHighRPStage::finalizeGCNSchedStage
void finalizeGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1173

llvm::UnclusteredHighRPStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:1527

llvm::VirtRegAuxInfo::allUsesAvailableAt
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
Definition CalcSpillWeights.cpp:149

llvm::cl::opt
Definition CommandLine.h:1455

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202

llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:244

llvm::AArch64::RP
@ RP
Definition AArch64ISelLowering.h:33

llvm::AMDGPU::Exp::Target
Target
Definition SIDefines.h:1008

llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Region
@ Region
Definition AMDGPUMetadata.h:75

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1347

llvm::AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1506

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1404

llvm::AMDGPU::SchedulingPhase::PostRA
@ PostRA
Definition AMDGPUIGroupLP.h:20

llvm::AMDGPU::SchedulingPhase::PreRAReentry
@ PreRAReentry
Definition AMDGPUIGroupLP.h:20

llvm::AMDGPU::SchedulingPhase::Initial
@ Initial
Definition AMDGPUIGroupLP.h:20

llvm::AMDGPU::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize(const Function &F)
Definition AMDGPUBaseInfo.cpp:2401

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:127

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition GCNRegPressure.cpp:24

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition GCNRegPressure.cpp:238

llvm::getWeakLeft
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition MachineScheduler.cpp:3825

llvm::Latency
@ Latency
Definition SIMachineScheduler.h:34

llvm::NodeOrder
@ NodeOrder
Definition SIMachineScheduler.h:37

llvm::NoCand
@ NoCand
Definition SIMachineScheduler.h:32

llvm::getRegPressure
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
Definition GCNRegPressure.h:507

llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
Definition AMDGPUIGroupLP.cpp:2690

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:557

llvm::RegionBoundaries
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
Definition GCNSchedStrategy.h:221

llvm::skipDebugInstructionsForward
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
Definition MachineBasicBlock.h:1477

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732

llvm::tryPressure
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition MachineScheduler.cpp:3787

llvm::GCNSchedStageID
GCNSchedStageID
Definition GCNSchedStrategy.h:29

llvm::GCNSchedStageID::UnclusteredHighRPReschedule
@ UnclusteredHighRPReschedule
Definition GCNSchedStrategy.h:31

llvm::GCNSchedStageID::MemoryClauseInitialSchedule
@ MemoryClauseInitialSchedule
Definition GCNSchedStrategy.h:35

llvm::GCNSchedStageID::ILPInitialSchedule
@ ILPInitialSchedule
Definition GCNSchedStrategy.h:34

llvm::GCNSchedStageID::PreRARematerialize
@ PreRARematerialize
Definition GCNSchedStrategy.h:33

llvm::GCNSchedStageID::OccInitialSchedule
@ OccInitialSchedule
Definition GCNSchedStrategy.h:30

llvm::GCNSchedStageID::ClusteredLowOccupancyReschedule
@ ClusteredLowOccupancyReschedule
Definition GCNSchedStrategy.h:32

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167

llvm::VerifyScheduling
LLVM_ABI cl::opt< bool > VerifyScheduling

llvm::tryLatency
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition MachineScheduler.cpp:3471

llvm::skipDebugInstructionsBackward
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
Definition MachineBasicBlock.h:1490

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:908

llvm::isTheSameCluster
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
Definition ScheduleDAG.h:244

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::tryGreater
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition MachineScheduler.cpp:3455

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition APFixedPoint.h:312

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1867

llvm::getLiveRegMap
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
Definition GCNRegPressure.h:457

llvm::getLiveRegsBefore
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
Definition GCNRegPressure.h:500

llvm::tryLess
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition MachineScheduler.cpp:3439

llvm::dumpMaxRegPressure
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
Definition GCNRegPressure.cpp:996

llvm::printMBBReference
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition MachineBasicBlock.cpp:120

llvm::biasPhysReg
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Definition MachineScheduler.cpp:3836

std
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867

true
Definition SPIRVConvergenceRegionAnalysis.cpp:40

EarlierIssuingCycle
Definition GCNSchedStrategy.cpp:1399

EarlierIssuingCycle::operator()
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
Definition GCNSchedStrategy.cpp:1400

llvm::GCNRegPressure
Definition GCNRegPressure.h:31

llvm::GCNRegPressure::SGPR
@ SGPR
Definition GCNRegPressure.h:32

llvm::GCNRegPressure::VGPR
@ VGPR
Definition GCNRegPressure.h:32

llvm::GCNRegPressure::getArchVGPRNum
unsigned getArchVGPRNum() const
Definition GCNRegPressure.h:85

llvm::GCNRegPressure::getAGPRNum
unsigned getAGPRNum() const
Definition GCNRegPressure.h:87

llvm::GCNRegPressure::getSGPRNum
unsigned getSGPRNum() const
Definition GCNRegPressure.h:56

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition MachineScheduler.h:1119

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition MachineScheduler.h:1120

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition MachineScheduler.h:1157

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition MachineScheduler.h:1190

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition MachineScheduler.h:1161

llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition MachineScheduler.h:1178

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition MachineScheduler.cpp:3238

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition MachineScheduler.h:1170

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition MachineScheduler.h:1167

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition MachineScheduler.h:1173

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition MachineScheduler.h:1187

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition MachineScheduler.h:1164

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition MachineScheduler.h:1158

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition MachineScheduler.h:1137

llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition MachineScheduler.h:1139

llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition MachineScheduler.h:1142

llvm::LaneBitmask::any
constexpr bool any() const
Definition LaneBitmask.h:53

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition MachineScheduler.h:143

llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition RegisterPressure.h:245

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition RegisterPressure.h:246

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition RegisterPressure.h:244

llvm::cl::desc
Definition CommandLine.h:411