/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp

Bug Summary

File:	llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:	line 1649, column 3 Called C++ object pointer is null
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//

14#include "SIMachineScheduler.h"
15#include "SIInstrInfo.h"
16#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17#include "llvm/CodeGen/LiveIntervals.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"

20using namespace llvm;

22#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"

24// This scheduler implements a different scheduling algorithm than
25// GenericScheduler.
26//
27// There are several specific architecture behaviours that can't be modelled
28// for GenericScheduler:
29// . When accessing the result of an SGPR load instruction, you have to wait
30// for all the SGPR load instructions before your current instruction to
31// have finished.
32// . When accessing the result of an VGPR load instruction, you have to wait
33// for all the VGPR load instructions previous to the VGPR load instruction
34// you are interested in to finish.
35// . The less the register pressure, the best load latencies are hidden
36//
37// Moreover some specifities (like the fact a lot of instructions in the shader
38// have few dependencies) makes the generic scheduler have some unpredictable
39// behaviours. For example when register pressure becomes high, it can either
40// manage to prevent register pressure from going too high, or it can
41// increase register pressure even more than if it hadn't taken register
42// pressure into account.
43//
44// Also some other bad behaviours are generated, like loading at the beginning
45// of the shader a constant in VGPR you won't need until the end of the shader.
46//
47// The scheduling problem for SI can distinguish three main parts:
48// . Hiding high latencies (texture sampling, etc)
49// . Hiding low latencies (SGPR constant loading, etc)
50// . Keeping register usage low for better latency hiding and general
51//   performance
52//
53// Some other things can also affect performance, but are hard to predict
54// (cache usage, the fact the HW can issue several instructions from different
55// wavefronts if different types, etc)
56//
57// This scheduler tries to solve the scheduling problem by dividing it into
58// simpler sub-problems. It divides the instructions into blocks, schedules
59// locally inside the blocks where it takes care of low latencies, and then
60// chooses the order of the blocks by taking care of high latencies.
61// Dividing the instructions into blocks helps control keeping register
62// usage low.
63//
64// First the instructions are put into blocks.
65//   We want the blocks help control register usage and hide high latencies
66//   later. To help control register usage, we typically want all local
67//   computations, when for example you create a result that can be comsummed
68//   right away, to be contained in a block. Block inputs and outputs would
69//   typically be important results that are needed in several locations of
70//   the shader. Since we do want blocks to help hide high latencies, we want
71//   the instructions inside the block to have a minimal set of dependencies
72//   on high latencies. It will make it easy to pick blocks to hide specific
73//   high latencies.
74//   The block creation algorithm is divided into several steps, and several
75//   variants can be tried during the scheduling process.
76//
77// Second the order of the instructions inside the blocks is chosen.
78//   At that step we do take into account only register usage and hiding
79//   low latency instructions
80//
81// Third the block order is chosen, there we try to hide high latencies
82// and keep register usage low.
83//
84// After the third step, a pass is done to improve the hiding of low
85// latencies.
86//
87// Actually when talking about 'low latency' or 'high latency' it includes
88// both the latency to get the cache (or global mem) data go to the register,
89// and the bandwidth limitations.
90// Increasing the number of active wavefronts helps hide the former, but it
91// doesn't solve the latter, thus why even if wavefront count is high, we have
92// to try have as many instructions hiding high latencies as possible.
93// The OpenCL doc says for example latency of 400 cycles for a global mem access,
94// which is hidden by 10 instructions if the wavefront count is 10.

96// Some figures taken from AMD docs:
97// Both texture and constant L1 caches are 4-way associative with 64 bytes
98// lines.
99// Constant cache is shared with 4 CUs.
100// For texture sampling, the address generation unit receives 4 texture
101// addresses per cycle, thus we could expect texture sampling latency to be
102// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
103// instructions in a wavefront group are executed every 4 cycles),
104// or 16 instructions if the other wavefronts associated to the 3 other VALUs
105// of the CU do texture sampling too. (Don't take these figures too seriously,
106// as I'm not 100% sure of the computation)
107// Data exports should get similar latency.
108// For constant loading, the cache is shader with 4 CUs.
109// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
110// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
111// It means a simple s_buffer_load should take one instruction to hide, as
112// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
113// cache line.
114//
115// As of today the driver doesn't preload the constants in cache, thus the
116// first loads get extra latency. The doc says global memory access can be
117// 300-600 cycles. We do not specially take that into account when scheduling
118// As we expect the driver to be able to preload the constants soon.

120// common code //

122#ifndef NDEBUG

124static const char *getReasonStr(SIScheduleCandReason Reason) {
switch (Reason) {
case NoCand:         return "NOCAND";
case RegUsage:       return "REGUSAGE";
case Latency:        return "LATENCY";
case Successor:      return "SUCCESSOR";
case Depth:          return "DEPTH";
case NodeOrder:      return "ORDER";
}
llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 133);
134}

136#endif

138namespace llvm {
139namespace SISched {
140static bool tryLess(int TryVal, int CandVal,
                  SISchedulerCandidate &TryCand,
                  SISchedulerCandidate &Cand,
                  SIScheduleCandReason Reason) {
if (TryVal < CandVal) {
  TryCand.Reason = Reason;
  return true;
}
if (TryVal > CandVal) {
  if (Cand.Reason > Reason)
    Cand.Reason = Reason;
  return true;
}
Cand.setRepeat(Reason);
return false;
155}

157static bool tryGreater(int TryVal, int CandVal,
                     SISchedulerCandidate &TryCand,
                     SISchedulerCandidate &Cand,
                     SIScheduleCandReason Reason) {
if (TryVal > CandVal) {
  TryCand.Reason = Reason;
  return true;
}
if (TryVal < CandVal) {
  if (Cand.Reason > Reason)
    Cand.Reason = Reason;
  return true;
}
Cand.setRepeat(Reason);
return false;
172}
173} // end namespace SISched
174} // end namespace llvm

176// SIScheduleBlock //

178void SIScheduleBlock::addUnit(SUnit *SU) {
NodeNum2Index[SU->NodeNum] = SUnits.size();
SUnits.push_back(SU);
181}

183#ifndef NDEBUG
184void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {

dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
dbgs() << '\n';
188}
189#endif

191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
                                        SISchedCandidate &TryCand) {
// Initialize the candidate if needed.
if (!Cand.isValid()) {
  TryCand.Reason = NodeOrder;
  return;
}

if (Cand.SGPRUsage > 60 &&
    SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
                     TryCand, Cand, RegUsage))
  return;

// Schedule low latency instructions as top as possible.
// Order of priority is:
// . Low latency instructions which do not depend on other low latency
//   instructions we haven't waited for
// . Other instructions which do not depend on low latency instructions
//   we haven't waited for
// . Low latencies
// . All other instructions
// Goal is to get: low latency instructions - independent instructions
//     - (eventually some more low latency instructions)
//     - instructions that depend on the first low latency instructions.
// If in the block there is a lot of constant loads, the SGPR usage
// could go quite high, thus above the arbitrary limit of 60 will encourage
// use the already loaded constants (in order to release some SGPRs) before
// loading more.
if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
                     Cand.HasLowLatencyNonWaitedParent,
                     TryCand, Cand, SIScheduleCandReason::Depth))
  return;

if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
                        TryCand, Cand, SIScheduleCandReason::Depth))
  return;

if (TryCand.IsLowLatency &&
    SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
                     TryCand, Cand, SIScheduleCandReason::Depth))
  return;

if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
                     TryCand, Cand, RegUsage))
  return;

// Fall through to original instruction order.
if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
  TryCand.Reason = NodeOrder;
}
241}

243SUnit* SIScheduleBlock::pickNode() {
SISchedCandidate TopCand;

for (SUnit* SU : TopReadySUs) {
  SISchedCandidate TryCand;
  std::vector<unsigned> pressure;
  std::vector<unsigned> MaxPressure;
  // Predict register usage after this instruction.
  TryCand.SU = SU;
  TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
  TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
  TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
  TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
  TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
  TryCand.HasLowLatencyNonWaitedParent =
    HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
  tryCandidateTopDown(TopCand, TryCand);
  if (TryCand.Reason != NoCand)
    TopCand.setBest(TryCand);
}

return TopCand.SU;
265}


268// Schedule something valid.
269void SIScheduleBlock::fastSchedule() {
TopReadySUs.clear();
if (Scheduled)
  undoSchedule();

for (SUnit* SU : SUnits) {
  if (!SU->NumPredsLeft)
    TopReadySUs.push_back(SU);
}

while (!TopReadySUs.empty()) {
  SUnit *SU = TopReadySUs[0];
  ScheduledSUnits.push_back(SU);
  nodeScheduled(SU);
}

Scheduled = true;
286}

288// Returns if the register was set between first and last.
289static bool isDefBetween(unsigned Reg,
                         SlotIndex First, SlotIndex Last,
                         const MachineRegisterInfo *MRI,
                         const LiveIntervals *LIS) {
for (MachineRegisterInfo::def_instr_iterator
     UI = MRI->def_instr_begin(Reg),
     UE = MRI->def_instr_end(); UI != UE; ++UI) {
  const MachineInstr* MI = &*UI;
  if (MI->isDebugValue())
    continue;
  SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
  if (InstSlot >= First && InstSlot <= Last)
    return true;
}
return false;
304}

306void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
                                    MachineBasicBlock::iterator EndBlock) {
IntervalPressure Pressure, BotPressure;
RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
LiveIntervals *LIS = DAG->getLIS();
MachineRegisterInfo *MRI = DAG->getMRI();
DAG->initRPTracker(TopRPTracker);
DAG->initRPTracker(BotRPTracker);
DAG->initRPTracker(RPTracker);

// Goes though all SU. RPTracker captures what had to be alive for the SUs
// to execute, and what is still alive at the end.
for (SUnit* SU : ScheduledSUnits) {
  RPTracker.setPos(SU->getInstr());
  RPTracker.advance();
}

// Close the RPTracker to finalize live ins/outs.
RPTracker.closeRegion();

// Initialize the live ins and live outs.
TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);

// Do not Track Physical Registers, because it messes up.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
  if (Register::isVirtualRegister(RegMaskPair.RegUnit))
    LiveInRegs.insert(RegMaskPair.RegUnit);
}
LiveOutRegs.clear();
// There is several possibilities to distinguish:
// 1) Reg is not input to any instruction in the block, but is output of one
// 2) 1) + read in the block and not needed after it
// 3) 1) + read in the block but needed in another block
// 4) Reg is input of an instruction but another block will read it too
// 5) Reg is input of an instruction and then rewritten in the block.
//    result is not read in the block (implies used in another block)
// 6) Reg is input of an instruction and then rewritten in the block.
//    result is read in the block and not needed in another block
// 7) Reg is input of an instruction and then rewritten in the block.
//    result is read in the block but also needed in another block
// LiveInRegs will contains all the regs in situation 4, 5, 6, 7
// We want LiveOutRegs to contain only Regs whose content will be read after
// in another block, and whose content was written in the current block,
// that is we want it to get 1, 3, 5, 7
// Since we made the MIs of a block to be packed all together before
// scheduling, then the LiveIntervals were correct, and the RPTracker was
// able to correctly handle 5 vs 6, 2 vs 3.
// (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
// The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
// Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
// The use of findDefBetween removes the case 4.
for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
  Register Reg = RegMaskPair.RegUnit;
  if (Reg.isVirtual() &&
      isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
                   LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
                   LIS)) {
    LiveOutRegs.insert(Reg);
  }
}

// Pressure = sum_alive_registers register size
// Internally llvm will represent some registers as big 128 bits registers
// for example, but they actually correspond to 4 actual 32 bits registers.
// Thus Pressure is not equal to num_alive_registers * constant.
LiveInPressure = TopPressure.MaxSetPressure;
LiveOutPressure = BotPressure.MaxSetPressure;

// Prepares TopRPTracker for top down scheduling.
TopRPTracker.closeTop();
377}

379void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
                             MachineBasicBlock::iterator EndBlock) {
if (!Scheduled)
  fastSchedule();

// PreScheduling phase to set LiveIn and LiveOut.
initRegPressure(BeginBlock, EndBlock);
undoSchedule();

// Schedule for real now.

TopReadySUs.clear();

for (SUnit* SU : SUnits) {
  if (!SU->NumPredsLeft)
    TopReadySUs.push_back(SU);
}

while (!TopReadySUs.empty()) {
  SUnit *SU = pickNode();
  ScheduledSUnits.push_back(SU);
  TopRPTracker.setPos(SU->getInstr());
  TopRPTracker.advance();
  nodeScheduled(SU);
}

// TODO: compute InternalAdditionnalPressure.
InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());

// Check everything is right.
409#ifndef NDEBUG
assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
 ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 411, __extension__ __PRETTY_FUNCTION__))
          TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
 ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 411, __extension__ __PRETTY_FUNCTION__));
for (SUnit* SU : SUnits) {
  assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 414, __extension__ __PRETTY_FUNCTION__))
            SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 414, __extension__ __PRETTY_FUNCTION__));
}
416#endif

Scheduled = true;
419}

421void SIScheduleBlock::undoSchedule() {
for (SUnit* SU : SUnits) {
  SU->isScheduled = false;
  for (SDep& Succ : SU->Succs) {
    if (BC->isSUInBlock(Succ.getSUnit(), ID))
      undoReleaseSucc(SU, &Succ);
  }
}
HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
ScheduledSUnits.clear();
Scheduled = false;
432}

434void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();

if (SuccEdge->isWeak()) {
  ++SuccSU->WeakPredsLeft;
  return;
}
++SuccSU->NumPredsLeft;
442}

444void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();

if (SuccEdge->isWeak()) {
  --SuccSU->WeakPredsLeft;
  return;
}
451#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
  dbgs() << "*** Scheduling failed! ***\n";
  DAG->dumpNode(*SuccSU);
  dbgs() << " has been released too many times!\n";
  llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 456);
}
458#endif

--SuccSU->NumPredsLeft;
461}

463/// Release Successors of the SU that are in the block or not.
464void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
for (SDep& Succ : SU->Succs) {
  SUnit *SuccSU = Succ.getSUnit();

  if (SuccSU->NodeNum >= DAG->SUnits.size())
      continue;

  if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
    continue;

  releaseSucc(SU, &Succ);
  if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
    TopReadySUs.push_back(SuccSU);
}
478}

480void SIScheduleBlock::nodeScheduled(SUnit *SU) {
// Is in TopReadySUs
assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
 __assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 482, __extension__ __PRETTY_FUNCTION__));
std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
if (I == TopReadySUs.end()) {
  dbgs() << "Data Structure Bug in SI Scheduler\n";
  llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 486);
}
TopReadySUs.erase(I);

releaseSuccessors(SU, true);
// Scheduling this node will trigger a wait,
// thus propagate to other instructions that they do not need to wait either.
if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
  HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);

if (DAG->IsLowLatencySU[SU->NodeNum]) {
   for (SDep& Succ : SU->Succs) {
    std::map<unsigned, unsigned>::iterator I =
      NodeNum2Index.find(Succ.getSUnit()->NodeNum);
    if (I != NodeNum2Index.end())
      HasLowLatencyNonWaitedParent[I->second] = 1;
  }
}
SU->isScheduled = true;
505}

507void SIScheduleBlock::finalizeUnits() {
// We remove links from outside blocks to enable scheduling inside the block.
for (SUnit* SU : SUnits) {
  releaseSuccessors(SU, false);
  if (DAG->IsHighLatencySU[SU->NodeNum])
    HighLatencyBlock = true;
}
HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
515}

517// we maintain ascending order of IDs
518void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
unsigned PredID = Pred->getID();

// Check if not already predecessor.
for (SIScheduleBlock* P : Preds) {
  if (PredID == P->getID())
    return;
}
Preds.push_back(Pred);

assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
               [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
                   SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
                 return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
                  }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
       "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__));
534}

536void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
                            SIScheduleBlockLinkKind Kind) {
unsigned SuccID = Succ->getID();

// Check if not already predecessor.
for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
  if (SuccID == S.first->getID()) {
    if (S.second == SIScheduleBlockLinkKind::NoData &&
        Kind == SIScheduleBlockLinkKind::Data)
      S.second = Kind;
    return;
  }
}
if (Succ->isHighLatencyBlock())
  ++NumHighLatencySuccessors;
Succs.push_back(std::make_pair(Succ, Kind));

assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__))
               [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__))
       "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__));
556}

558#ifndef NDEBUG
559void SIScheduleBlock::printDebug(bool full) {
dbgs() << "Block (" << ID << ")\n";
if (!full)
  return;

dbgs() << "\nContains High Latency Instruction: "
       << HighLatencyBlock << '\n';
dbgs() << "\nDepends On:\n";
for (SIScheduleBlock* P : Preds) {
  P->printDebug(false);
}

dbgs() << "\nSuccessors:\n";
for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
  if (S.second == SIScheduleBlockLinkKind::Data)
    dbgs() << "(Data Dep) ";
  S.first->printDebug(false);
}

if (Scheduled) {
  dbgs() << "LiveInPressure "
         << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
         << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
  dbgs() << "LiveOutPressure "
         << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
         << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
  dbgs() << "LiveIns:\n";
  for (unsigned Reg : LiveInRegs)
    dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';

  dbgs() << "\nLiveOuts:\n";
  for (unsigned Reg : LiveOutRegs)
    dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
}

dbgs() << "\nInstructions:\n";
for (const SUnit* SU : SUnits)
    DAG->dumpNode(*SU);

dbgs() << "///////////////////////\n";
599}
600#endif

602// SIScheduleBlockCreator //

604SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
  : DAG(DAG) {}

607SIScheduleBlocks
608SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
  Blocks.find(BlockVariant);
if (B == Blocks.end()) {
  SIScheduleBlocks Res;
  createBlocksForVariant(BlockVariant);
  topologicalSort();
  scheduleInsideBlocks();
  fillStats();
  Res.Blocks = CurrentBlocks;
  Res.TopDownIndex2Block = TopDownIndex2Block;
  Res.TopDownBlock2Index = TopDownBlock2Index;
  Blocks[BlockVariant] = Res;
  return Res;
} else {
  return B->second;
}
625}

627bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
if (SU->NodeNum >= DAG->SUnits.size())
  return false;
return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
631}

633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
unsigned DAGSize = DAG->SUnits.size();

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  if (DAG->IsHighLatencySU[SU->NodeNum]) {
    CurrentColoring[SU->NodeNum] = NextReservedID++;
  }
}
642}

644static bool
645hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
for (const auto &PredDep : SU.Preds) {
  if (PredDep.getSUnit() == &FromSU &&
      PredDep.getKind() == llvm::SDep::Data)
    return true;
}
return false;
652}

654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
unsigned DAGSize = DAG->SUnits.size();
unsigned NumHighLatencies = 0;
unsigned GroupSize;
int Color = NextReservedID;
unsigned Count = 0;
std::set<unsigned> FormingGroup;

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  if (DAG->IsHighLatencySU[SU->NodeNum])
    ++NumHighLatencies;
}

if (NumHighLatencies == 0)
  return;

if (NumHighLatencies <= 6)
  GroupSize = 2;
else if (NumHighLatencies <= 12)
  GroupSize = 3;
else
  GroupSize = 4;

for (unsigned SUNum : DAG->TopDownIndex2SU) {
  const SUnit &SU = DAG->SUnits[SUNum];
  if (DAG->IsHighLatencySU[SU.NodeNum]) {
    unsigned CompatibleGroup = true;
    int ProposedColor = Color;
    std::vector<int> AdditionalElements;

    // We don't want to put in the same block
    // two high latency instructions that depend
    // on each other.
    // One way would be to check canAddEdge
    // in both directions, but that currently is not
    // enough because there the high latency order is
    // enforced (via links).
    // Instead, look at the dependencies between the
    // high latency instructions and deduce if it is
    // a data dependency or not.
    for (unsigned j : FormingGroup) {
      bool HasSubGraph;
      std::vector<int> SubGraph;
      // By construction (topological order), if SU and
      // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
      // in the parent graph of SU.
701#ifndef NDEBUG
      SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
                                             HasSubGraph);
      assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
 ("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 704, __extension__ __PRETTY_FUNCTION__));
705#endif
      SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
                                             HasSubGraph);
      if (!HasSubGraph)
        continue; // No dependencies between each other
      else if (SubGraph.size() > 5) {
        // Too many elements would be required to be added to the block.
        CompatibleGroup = false;
        break;
      }
      else {
        // Check the type of dependency
        for (unsigned k : SubGraph) {
          // If in the path to join the two instructions,
          // there is another high latency instruction,
          // or instructions colored for another block
          // abort the merge.
          if (DAG->IsHighLatencySU[k] ||
              (CurrentColoring[k] != ProposedColor &&
               CurrentColoring[k] != 0)) {
            CompatibleGroup = false;
            break;
          }
          // If one of the SU in the subgraph depends on the result of SU j,
          // there'll be a data dependency.
          if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
            CompatibleGroup = false;
            break;
          }
        }
        if (!CompatibleGroup)
          break;
        // Same check for the SU
        if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
          CompatibleGroup = false;
          break;
        }
        // Add all the required instructions to the block
        // These cannot live in another block (because they
        // depend (order dependency) on one of the
        // instruction in the block, and are required for the
        // high latency instruction we add.
        llvm::append_range(AdditionalElements, SubGraph);
      }
    }
    if (CompatibleGroup) {
      FormingGroup.insert(SU.NodeNum);
      for (unsigned j : AdditionalElements)
        CurrentColoring[j] = ProposedColor;
      CurrentColoring[SU.NodeNum] = ProposedColor;
      ++Count;
    }
    // Found one incompatible instruction,
    // or has filled a big enough group.
    // -> start a new one.
    if (!CompatibleGroup) {
      FormingGroup.clear();
      Color = ++NextReservedID;
      ProposedColor = Color;
      FormingGroup.insert(SU.NodeNum);
      CurrentColoring[SU.NodeNum] = ProposedColor;
      Count = 0;
    } else if (Count == GroupSize) {
      FormingGroup.clear();
      Color = ++NextReservedID;
      ProposedColor = Color;
      Count = 0;
    }
  }
}
775}

777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::map<std::set<unsigned>, unsigned> ColorCombinations;

CurrentTopDownReservedDependencyColoring.clear();
CurrentBottomUpReservedDependencyColoring.clear();

CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);

// Traverse TopDown, and give different colors to SUs depending
// on which combination of High Latencies they depend on.

for (unsigned SUNum : DAG->TopDownIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;

  // Already given.
  if (CurrentColoring[SU->NodeNum]) {
    CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
      CurrentColoring[SU->NodeNum];
    continue;
  }

 for (SDep& PredDep : SU->Preds) {
    SUnit *Pred = PredDep.getSUnit();
    if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
      continue;
    if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
      SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
  }
  // Color 0 by default.
  if (SUColors.empty())
    continue;
  // Same color than parents.
  if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
    CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
      *SUColors.begin();
  else {
    std::map<std::set<unsigned>, unsigned>::iterator Pos =
      ColorCombinations.find(SUColors);
    if (Pos != ColorCombinations.end()) {
        CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
    } else {
      CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
        NextNonReservedID;
      ColorCombinations[SUColors] = NextNonReservedID++;
    }
  }
}

ColorCombinations.clear();

// Same as before, but BottomUp.

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;

  // Already given.
  if (CurrentColoring[SU->NodeNum]) {
    CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
      CurrentColoring[SU->NodeNum];
    continue;
  }

  for (SDep& SuccDep : SU->Succs) {
    SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
      SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
  }
  // Keep color 0.
  if (SUColors.empty())
    continue;
  // Same color than parents.
  if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
    CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
      *SUColors.begin();
  else {
    std::map<std::set<unsigned>, unsigned>::iterator Pos =
      ColorCombinations.find(SUColors);
    if (Pos != ColorCombinations.end()) {
      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
    } else {
      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
        NextNonReservedID;
      ColorCombinations[SUColors] = NextNonReservedID++;
    }
  }
}
869}

871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;

// Every combination of colors given by the top down
// and bottom up Reserved node dependency

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  std::pair<unsigned, unsigned> SUColors;

  // High latency instructions: already given.
  if (CurrentColoring[SU->NodeNum])
    continue;

  SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
  SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];

  std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
    ColorCombinations.find(SUColors);
  if (Pos != ColorCombinations.end()) {
    CurrentColoring[SU->NodeNum] = Pos->second;
  } else {
    CurrentColoring[SU->NodeNum] = NextNonReservedID;
    ColorCombinations[SUColors] = NextNonReservedID++;
  }
}
898}

900void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;

assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__))
       CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__))
       CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__));
// If there is no reserved block at all, do nothing. We don't want
// everything in one block.
if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
                      CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
    *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
                      CurrentTopDownReservedDependencyColoring.end()) == 0)
  return;

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;
  std::set<unsigned> SUColorsPending;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
      CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
    continue;

  for (SDep& SuccDep : SU->Succs) {
    SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
        CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
      SUColors.insert(CurrentColoring[Succ->NodeNum]);
    SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
  }
  // If there is only one child/parent block, and that block
  // is not among the ones we are removing in this path, then
  // merge the instruction to that block
  if (SUColors.size() == 1 && SUColorsPending.size() == 1)
    PendingColoring[SU->NodeNum] = *SUColors.begin();
  else // TODO: Attribute new colors depending on color
       // combination of children.
    PendingColoring[SU->NodeNum] = NextNonReservedID++;
}
CurrentColoring = PendingColoring;
946}


949void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
unsigned DAGSize = DAG->SUnits.size();
unsigned PreviousColor;
std::set<unsigned> SeenColors;

if (DAGSize <= 1)
  return;

PreviousColor = CurrentColoring[0];

for (unsigned i = 1, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  unsigned CurrentColor = CurrentColoring[i];
  unsigned PreviousColorSave = PreviousColor;
  assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
 ("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 963, __extension__ __PRETTY_FUNCTION__));

  if (CurrentColor != PreviousColor)
    SeenColors.insert(PreviousColor);
  PreviousColor = CurrentColor;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  if (SeenColors.find(CurrentColor) == SeenColors.end())
    continue;

  if (PreviousColorSave != CurrentColor)
    CurrentColoring[i] = NextNonReservedID++;
  else
    CurrentColoring[i] = CurrentColoring[i-1];
}
980}

982void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
unsigned DAGSize = DAG->SUnits.size();

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  // No predecessor: Vgpr constant loading.
  // Low latency instructions usually have a predecessor (the address)
  if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
    continue;

  for (SDep& SuccDep : SU->Succs) {
    SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    SUColors.insert(CurrentColoring[Succ->NodeNum]);
  }
  if (SUColors.size() == 1)
    CurrentColoring[SU->NodeNum] = *SUColors.begin();
}
1006}

1008void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
unsigned DAGSize = DAG->SUnits.size();

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  for (SDep& SuccDep : SU->Succs) {
     SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    SUColors.insert(CurrentColoring[Succ->NodeNum]);
  }
  if (SUColors.size() == 1)
    CurrentColoring[SU->NodeNum] = *SUColors.begin();
}
1027}

1029void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
unsigned DAGSize = DAG->SUnits.size();

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  std::set<unsigned> SUColors;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  for (SDep& SuccDep : SU->Succs) {
     SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    SUColors.insert(CurrentColoring[Succ->NodeNum]);
  }
  if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
    CurrentColoring[SU->NodeNum] = *SUColors.begin();
}
1048}

1050void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
unsigned DAGSize = DAG->SUnits.size();
std::map<unsigned, unsigned> ColorCount;

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  unsigned color = CurrentColoring[SU->NodeNum];
   ++ColorCount[color];
}

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  unsigned color = CurrentColoring[SU->NodeNum];
  std::set<unsigned> SUColors;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  if (ColorCount[color] > 1)
    continue;

  for (SDep& SuccDep : SU->Succs) {
     SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    SUColors.insert(CurrentColoring[Succ->NodeNum]);
  }
  if (SUColors.size() == 1 && *SUColors.begin() != color) {
    --ColorCount[color];
    CurrentColoring[SU->NodeNum] = *SUColors.begin();
    ++ColorCount[*SUColors.begin()];
  }
}
1083}

1085void SIScheduleBlockCreator::cutHugeBlocks() {
// TODO
1087}

1089void SIScheduleBlockCreator::regroupNoUserInstructions() {
unsigned DAGSize = DAG->SUnits.size();
int GroupID = NextNonReservedID++;

for (unsigned SUNum : DAG->BottomUpIndex2SU) {
  SUnit *SU = &DAG->SUnits[SUNum];
  bool hasSuccessor = false;

  if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
    continue;

  for (SDep& SuccDep : SU->Succs) {
     SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    hasSuccessor = true;
  }
  if (!hasSuccessor)
    CurrentColoring[SU->NodeNum] = GroupID;
}
1109}

1111void SIScheduleBlockCreator::colorExports() {
unsigned ExportColor = NextNonReservedID++;
SmallVector<unsigned, 8> ExpGroup;

// Put all exports together in a block.
// The block will naturally end up being scheduled last,
// thus putting exports at the end of the schedule, which
// is better for performance.
// However we must ensure, for safety, the exports can be put
// together in the same block without any other instruction.
// This could happen, for example, when scheduling after regalloc
// if reloading a spilled register from memory using the same
// register than used in a previous export.
// If that happens, do not regroup the exports.
for (unsigned SUNum : DAG->TopDownIndex2SU) {
  const SUnit &SU = DAG->SUnits[SUNum];
  if (SIInstrInfo::isEXP(*SU.getInstr())) {
    // Check the EXP can be added to the group safely,
    // ie without needing any other instruction.
    // The EXP is allowed to depend on other EXP
    // (they will be in the same group).
    for (unsigned j : ExpGroup) {
      bool HasSubGraph;
      std::vector<int> SubGraph;
      // By construction (topological order), if SU and
      // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
      // in the parent graph of SU.
1138#ifndef NDEBUG
      SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
                                             HasSubGraph);
      assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
 ("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1141, __extension__ __PRETTY_FUNCTION__));
1142#endif
      SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
                                             HasSubGraph);
      if (!HasSubGraph)
        continue; // No dependencies between each other

      // SubGraph contains all the instructions required
      // between EXP SUnits[j] and EXP SU.
      for (unsigned k : SubGraph) {
        if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
          // Other instructions than EXP would be required in the group.
          // Abort the groupping.
          return;
      }
    }

    ExpGroup.push_back(SUNum);
  }
}

// The group can be formed. Give the color.
for (unsigned j : ExpGroup)
  CurrentColoring[j] = ExportColor;
1165}

1167void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
unsigned DAGSize = DAG->SUnits.size();
std::map<unsigned,unsigned> RealID;

CurrentBlocks.clear();
CurrentColoring.clear();
CurrentColoring.resize(DAGSize, 0);
Node2CurrentBlock.clear();

// Restore links previous scheduling variant has overridden.
DAG->restoreSULinksLeft();

NextReservedID = 1;
NextNonReservedID = DAGSize + 1;

LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false);

if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
  colorHighLatenciesGroups();
else
  colorHighLatenciesAlone();
colorComputeReservedDependencies();
colorAccordingToReservedDependencies();
colorEndsAccordingToDependencies();
if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
  colorForceConsecutiveOrderInGroup();
regroupNoUserInstructions();
colorMergeConstantLoadsNextGroup();
colorMergeIfPossibleNextGroupOnlyForReserved();
colorExports();

// Put SUs of same color into same block
Node2CurrentBlock.resize(DAGSize, -1);
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  unsigned Color = CurrentColoring[SU->NodeNum];
  if (RealID.find(Color) == RealID.end()) {
    int ID = CurrentBlocks.size();
    BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
    CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
    RealID[Color] = ID;
  }
  CurrentBlocks[RealID[Color]]->addUnit(SU);
  Node2CurrentBlock[SU->NodeNum] = RealID[Color];
}

// Build dependencies between blocks.
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SUnit *SU = &DAG->SUnits[i];
  int SUID = Node2CurrentBlock[i];
   for (SDep& SuccDep : SU->Succs) {
     SUnit *Succ = SuccDep.getSUnit();
    if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
      continue;
    if (Node2CurrentBlock[Succ->NodeNum] != SUID)
      CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
                                   SuccDep.isCtrl() ? NoData : Data);
  }
  for (SDep& PredDep : SU->Preds) {
    SUnit *Pred = PredDep.getSUnit();
    if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
      continue;
    if (Node2CurrentBlock[Pred->NodeNum] != SUID)
      CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
  }
}

// Free root and leafs of all blocks to enable scheduling inside them.
for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
  SIScheduleBlock *Block = CurrentBlocks[i];
  Block->finalizeUnits();
}
LLVM_DEBUG(dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
           for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
             SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
             Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
           })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false);
1244}

1246// Two functions taken from Codegen/MachineScheduler.cpp

1248/// Non-const version.
1249static MachineBasicBlock::iterator
1250nextIfDebug(MachineBasicBlock::iterator I,
          MachineBasicBlock::const_iterator End) {
for (; I != End; ++I) {
  if (!I->isDebugInstr())
    break;
}
return I;
1257}

1259void SIScheduleBlockCreator::topologicalSort() {
unsigned DAGSize = CurrentBlocks.size();
std::vector<int> WorkList;

LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false);

WorkList.reserve(DAGSize);
TopDownIndex2Block.resize(DAGSize);
TopDownBlock2Index.resize(DAGSize);
BottomUpIndex2Block.resize(DAGSize);

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SIScheduleBlock *Block = CurrentBlocks[i];
  unsigned Degree = Block->getSuccs().size();
  TopDownBlock2Index[i] = Degree;
  if (Degree == 0) {
    WorkList.push_back(i);
  }
}

int Id = DAGSize;
while (!WorkList.empty()) {
  int i = WorkList.back();
  SIScheduleBlock *Block = CurrentBlocks[i];
  WorkList.pop_back();
  TopDownBlock2Index[i] = --Id;
  TopDownIndex2Block[Id] = i;
  for (SIScheduleBlock* Pred : Block->getPreds()) {
    if (!--TopDownBlock2Index[Pred->getID()])
      WorkList.push_back(Pred->getID());
  }
}

1292#ifndef NDEBUG
// Check correctness of the ordering.
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SIScheduleBlock *Block = CurrentBlocks[i];
  for (SIScheduleBlock* Pred : Block->getPreds()) {
    assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1298, __extension__ __PRETTY_FUNCTION__))
    "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1298, __extension__ __PRETTY_FUNCTION__));
  }
}
1301#endif

BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
                                       TopDownIndex2Block.rend());
1305}

1307void SIScheduleBlockCreator::scheduleInsideBlocks() {
unsigned DAGSize = CurrentBlocks.size();

LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false);

// We do schedule a valid scheduling such that a Block corresponds
// to a range of instructions.
LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false);
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SIScheduleBlock *Block = CurrentBlocks[i];
  Block->fastSchedule();
}

// Note: the following code, and the part restoring previous position
// is by far the most expensive operation of the Scheduler.

// Do not update CurrentTop.
MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
std::vector<MachineBasicBlock::iterator> PosOld;
std::vector<MachineBasicBlock::iterator> PosNew;
PosOld.reserve(DAG->SUnits.size());
PosNew.reserve(DAG->SUnits.size());

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  int BlockIndice = TopDownIndex2Block[i];
  SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
  std::vector<SUnit*> SUs = Block->getScheduledUnits();

  for (SUnit* SU : SUs) {
    MachineInstr *MI = SU->getInstr();
    MachineBasicBlock::iterator Pos = MI;
    PosOld.push_back(Pos);
    if (&*CurrentTopFastSched == MI) {
      PosNew.push_back(Pos);
      CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
                                        DAG->getCurrentBottom());
    } else {
      // Update the instruction stream.
      DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);

      // Update LiveIntervals.
      // Note: Moving all instructions and calling handleMove every time
      // is the most cpu intensive operation of the scheduler.
      // It would gain a lot if there was a way to recompute the
      // LiveIntervals for the entire scheduling region.
      DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
      PosNew.push_back(CurrentTopFastSched);
    }
  }
}

// Now we have Block of SUs == Block of MI.
// We do the final schedule for the instructions inside the block.
// The property that all the SUs of the Block are grouped together as MI
// is used for correct reg usage tracking.
for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  SIScheduleBlock *Block = CurrentBlocks[i];
  std::vector<SUnit*> SUs = Block->getScheduledUnits();
  Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
}

LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false);
// Restore old ordering (which prevents a LIS->handleMove bug).
for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
  MachineBasicBlock::iterator POld = PosOld[i-1];
  MachineBasicBlock::iterator PNew = PosNew[i-1];
  if (PNew != POld) {
    // Update the instruction stream.
    DAG->getBB()->splice(POld, DAG->getBB(), PNew);

    // Update LiveIntervals.
    DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
  }
}

LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
  SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
  Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false);
1386}

1388void SIScheduleBlockCreator::fillStats() {
unsigned DAGSize = CurrentBlocks.size();

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  int BlockIndice = TopDownIndex2Block[i];
  SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
  if (Block->getPreds().empty())
    Block->Depth = 0;
  else {
    unsigned Depth = 0;
    for (SIScheduleBlock *Pred : Block->getPreds()) {
      if (Depth < Pred->Depth + Pred->getCost())
        Depth = Pred->Depth + Pred->getCost();
    }
    Block->Depth = Depth;
  }
}

for (unsigned i = 0, e = DAGSize; i != e; ++i) {
  int BlockIndice = BottomUpIndex2Block[i];
  SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
  if (Block->getSuccs().empty())
    Block->Height = 0;
  else {
    unsigned Height = 0;
    for (const auto &Succ : Block->getSuccs())
      Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
    Block->Height = Height;
  }
}
1418}

1420// SIScheduleBlockScheduler //

1422SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
                                                 SISchedulerBlockSchedulerVariant Variant,
                                                 SIScheduleBlocks  BlocksStruct) :
DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {

// Fill the usage of every output
// Warning: while by construction we always have a link between two blocks
// when one needs a result from the other, the number of users of an output
// is not the sum of child blocks having as input the same virtual register.
// Here is an example. A produces x and y. B eats x and produces x'.
// C eats x' and y. The register coalescer may have attributed the same
// virtual register to x and x'.
// To count accurately, we do a topological sort. In case the register is
// found for several parents, we increment the usage of the one with the
// highest topological index.
LiveOutRegsNumUsages.resize(Blocks.size());
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
9
←
Assuming 'i' is equal to 'e'→
10
←
Loop condition is false. Execution continues on line 1465→
  SIScheduleBlock *Block = Blocks[i];
  for (unsigned Reg : Block->getInRegs()) {
    bool Found = false;
    int topoInd = -1;
    for (SIScheduleBlock* Pred: Block->getPreds()) {
      std::set<unsigned> PredOutRegs = Pred->getOutRegs();
      std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);

      if (RegPos != PredOutRegs.end()) {
        Found = true;
        if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
          topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
        }
      }
    }

    if (!Found)
      continue;

    int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
    ++LiveOutRegsNumUsages[PredID][Reg];
  }
}

LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
BlockNumPredsLeft.resize(Blocks.size());
BlockNumSuccsLeft.resize(Blocks.size());

for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
11
←
Assuming 'i' is equal to 'e'→
12
←
Loop condition is false. Execution continues on line 1476→
  SIScheduleBlock *Block = Blocks[i];
  BlockNumPredsLeft[i] = Block->getPreds().size();
  BlockNumSuccsLeft[i] = Block->getSuccs().size();
}

1475#ifndef NDEBUG
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
13
←
Assuming 'i' is equal to 'e'→
14
←
Loop condition is false. Execution continues on line 1482→
  SIScheduleBlock *Block = Blocks[i];
  assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
 : __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1478, __extension__ __PRETTY_FUNCTION__));
}
1480#endif

std::set<unsigned> InRegs = DAG->getInRegs();
addLiveRegs(InRegs);

// Increase LiveOutRegsNumUsages for blocks
// producing registers consumed in another
// scheduling region.
for (unsigned Reg : DAG->getOutRegs()) {
  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    // Do reverse traversal
    int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
    SIScheduleBlock *Block = Blocks[ID];
    const std::set<unsigned> &OutRegs = Block->getOutRegs();

    if (OutRegs.find(Reg) == OutRegs.end())
      continue;

    ++LiveOutRegsNumUsages[ID][Reg];
    break;
  }
}

// Fill LiveRegsConsumers for regs that were already
// defined before scheduling.
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
15
←
Assuming 'i' is equal to 'e'→
16
←
Loop condition is false. Execution continues on line 1524→
  SIScheduleBlock *Block = Blocks[i];
  for (unsigned Reg : Block->getInRegs()) {
    bool Found = false;
    for (SIScheduleBlock* Pred: Block->getPreds()) {
      std::set<unsigned> PredOutRegs = Pred->getOutRegs();
      std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);

      if (RegPos != PredOutRegs.end()) {
        Found = true;
        break;
      }
    }

    if (!Found)
      ++LiveRegsConsumers[Reg];
  }
}

for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
17
←
Assuming 'i' is equal to 'e'→
18
←
Loop condition is false. Execution continues on line 1531→
  SIScheduleBlock *Block = Blocks[i];
  if (BlockNumPredsLeft[i] == 0) {
    ReadyBlocks.push_back(Block);
  }
}

while (SIScheduleBlock *Block = pickBlock()) {
19
←
Calling 'SIScheduleBlockScheduler::pickBlock'→
  BlocksScheduled.push_back(Block);
  blockScheduled(Block);
}

LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
                                          : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
  dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
} dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false);
1540}

1542bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
                                                 SIBlockSchedCandidate &TryCand) {
if (!Cand.isValid()) {
  TryCand.Reason = NodeOrder;
  return true;
}

// Try to hide high latencies.
if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
               Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
  return true;
// Schedule high latencies early so you can hide them better.
if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
                        TryCand, Cand, Latency))
  return true;
if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
                                                 TryCand, Cand, Depth))
  return true;
if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
                        Cand.NumHighLatencySuccessors,
                        TryCand, Cand, Successor))
  return true;
return false;
1565}

1567bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
                                                  SIBlockSchedCandidate &TryCand) {
if (!Cand.isValid()) {
  TryCand.Reason = NodeOrder;
  return true;
}

if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
                     TryCand, Cand, RegUsage))
  return true;
if (SISched::tryGreater(TryCand.NumSuccessors > 0,
                        Cand.NumSuccessors > 0,
                        TryCand, Cand, Successor))
  return true;
if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
  return true;
if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
                     TryCand, Cand, RegUsage))
  return true;
return false;
1587}

1589SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
SIBlockSchedCandidate Cand;
std::vector<SIScheduleBlock*>::iterator Best;
SIScheduleBlock *Block;
if (ReadyBlocks.empty())
20
←
Assuming the condition is false→
21
←
Taking false branch→
  return nullptr;

DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
                      VregCurrentUsage, SregCurrentUsage);
if (VregCurrentUsage > maxVregUsage)
22
←
Assuming field 'VregCurrentUsage' is <= field 'maxVregUsage'→
23
←
Taking false branch→
  maxVregUsage = VregCurrentUsage;
if (SregCurrentUsage > maxSregUsage)
24
←
Assuming field 'SregCurrentUsage' is <= field 'maxSregUsage'→
  maxSregUsage = SregCurrentUsage;
LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
25
←
Taking false branch→
26
←
Assuming 'DebugFlag' is true→
27
←
Assuming the condition is false→
28
←
Taking false branch→
29
←
Loop condition is false.  Exiting loop→
           for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
                : ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
                : LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
           dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false);

Cand.Block = nullptr;
30
←
Null pointer value stored to 'Cand.Block'→
for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
     E = ReadyBlocks.end(); I != E; ++I) {
  SIBlockSchedCandidate TryCand;
  TryCand.Block = *I;
  TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
  TryCand.VGPRUsageDiff =
    checkRegUsageImpact(TryCand.Block->getInRegs(),
        TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
  TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
  TryCand.NumHighLatencySuccessors =
    TryCand.Block->getNumHighLatencySuccessors();
  TryCand.LastPosHighLatParentScheduled =
    (unsigned int) std::max<int> (0,
       LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
         LastPosWaitedHighLatency);
  TryCand.Height = TryCand.Block->Height;
  // Try not to increase VGPR usage too much, else we may spill.
  if (VregCurrentUsage > 120 ||
      Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
    if (!tryCandidateRegUsage(Cand, TryCand) &&
        Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
      tryCandidateLatency(Cand, TryCand);
  } else {
    if (!tryCandidateLatency(Cand, TryCand))
      tryCandidateRegUsage(Cand, TryCand);
  }
  if (TryCand.Reason != NoCand) {
    Cand.setBest(TryCand);
    Best = I;
    LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
 << Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
                      << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
 << Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false);
  }
}

LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
31
←
Loop condition is false. Execution continues on line 1649→
32
←
Assuming 'DebugFlag' is true→
33
←
Assuming the condition is true→
34
←
Taking true branch→
35
←
Called C++ object pointer is null
           dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
                  << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
           dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
                  << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
           dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
           dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false);

Block = Cand.Block;
ReadyBlocks.erase(Best);
return Block;
1660}

1662// Tracking of currently alive registers to determine VGPR Usage.

1664void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
for (Register Reg : Regs) {
  // For now only track virtual registers.
  if (!Reg.isVirtual())
    continue;
  // If not already in the live set, then add it.
  (void) LiveRegs.insert(Reg);
}
1672}

1674void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
                                     std::set<unsigned> &Regs) {
for (unsigned Reg : Regs) {
  // For now only track virtual registers.
  std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
  assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
             LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
             LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__));
  --LiveRegsConsumers[Reg];
  if (LiveRegsConsumers[Reg] == 0)
    LiveRegs.erase(Pos);
}
1686}

1688void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
for (const auto &Block : Parent->getSuccs()) {
  if (--BlockNumPredsLeft[Block.first->getID()] == 0)
    ReadyBlocks.push_back(Block.first);

  if (Parent->isHighLatencyBlock() &&
      Block.second == SIScheduleBlockLinkKind::Data)
    LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
}
1697}

1699void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
decreaseLiveRegs(Block, Block->getInRegs());
addLiveRegs(Block->getOutRegs());
releaseBlockSuccs(Block);
for (std::map<unsigned, unsigned>::iterator RegI =
     LiveOutRegsNumUsages[Block->getID()].begin(),
     E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
  std::pair<unsigned, unsigned> RegP = *RegI;
  // We produce this register, thus it must not be previously alive.
  assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
 == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1709, __extension__ __PRETTY_FUNCTION__))
         LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
 == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1709, __extension__ __PRETTY_FUNCTION__));
  LiveRegsConsumers[RegP.first] += RegP.second;
}
if (LastPosHighLatencyParentScheduled[Block->getID()] >
      (unsigned)LastPosWaitedHighLatency)
  LastPosWaitedHighLatency =
    LastPosHighLatencyParentScheduled[Block->getID()];
++NumBlockScheduled;
1717}

1719std::vector<int>
1720SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
                                   std::set<unsigned> &OutRegs) {
std::vector<int> DiffSetPressure;
DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);

for (Register Reg : InRegs) {
  // For now only track virtual registers.
  if (!Reg.isVirtual())
    continue;
  if (LiveRegsConsumers[Reg] > 1)
    continue;
  PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
  for (; PSetI.isValid(); ++PSetI) {
    DiffSetPressure[*PSetI] -= PSetI.getWeight();
  }
}

for (Register Reg : OutRegs) {
  // For now only track virtual registers.
  if (!Reg.isVirtual())
    continue;
  PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
  for (; PSetI.isValid(); ++PSetI) {
    DiffSetPressure[*PSetI] += PSetI.getWeight();
  }
}

return DiffSetPressure;
1748}

1750// SIScheduler //

1752struct SIScheduleBlockResult
1753SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
                           SISchedulerBlockSchedulerVariant ScheduleVariant) {
SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
8
←
Calling constructor for 'SIScheduleBlockScheduler'→
std::vector<SIScheduleBlock*> ScheduledBlocks;
struct SIScheduleBlockResult Res;

ScheduledBlocks = Scheduler.getBlocks();

for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
  SIScheduleBlock *Block = ScheduledBlocks[b];
  std::vector<SUnit*> SUs = Block->getScheduledUnits();

  for (SUnit* SU : SUs)
    Res.SUs.push_back(SU->NodeNum);
}

Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
return Res;
1773}

1775// SIScheduleDAGMI //

1777SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
SITII = static_cast<const SIInstrInfo*>(TII);
SITRI = static_cast<const SIRegisterInfo*>(TRI);
1781}

1783SIScheduleDAGMI::~SIScheduleDAGMI() = default;

1785// Code adapted from scheduleDAG.cpp
1786// Does a topological sort over the SUs.
1787// Both TopDown and BottomUp
1788void SIScheduleDAGMI::topologicalSort() {
Topo.InitDAGTopologicalSorting();

TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1793}

1795// Move low latencies further from their user without
1796// increasing SGPR usage (in general)
1797// This is to be replaced by a better pass that would
1798// take into account SGPR usage (based on VGPR Usage
1799// and the corresponding wavefront count), that would
1800// try to merge groups of loads if it make sense, etc
1801void SIScheduleDAGMI::moveLowLatencies() {
 unsigned DAGSize = SUnits.size();
 int LastLowLatencyUser = -1;
 int LastLowLatencyPos = -1;

 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
  SUnit *SU = &SUnits[ScheduledSUnits[i]];
  bool IsLowLatencyUser = false;
  unsigned MinPos = 0;

  for (SDep& PredDep : SU->Preds) {
    SUnit *Pred = PredDep.getSUnit();
    if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
      IsLowLatencyUser = true;
    }
    if (Pred->NodeNum >= DAGSize)
      continue;
    unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
    if (PredPos >= MinPos)
      MinPos = PredPos + 1;
  }

  if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
    unsigned BestPos = LastLowLatencyUser + 1;
    if ((int)BestPos <= LastLowLatencyPos)
      BestPos = LastLowLatencyPos + 1;
    if (BestPos < MinPos)
      BestPos = MinPos;
    if (BestPos < i) {
      for (unsigned u = i; u > BestPos; --u) {
        ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
        ScheduledSUnits[u] = ScheduledSUnits[u-1];
      }
      ScheduledSUnits[BestPos] = SU->NodeNum;
      ScheduledSUnitsInv[SU->NodeNum] = BestPos;
    }
    LastLowLatencyPos = BestPos;
    if (IsLowLatencyUser)
      LastLowLatencyUser = BestPos;
  } else if (IsLowLatencyUser) {
    LastLowLatencyUser = i;
  // Moves COPY instructions on which depends
  // the low latency instructions too.
  } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
    bool CopyForLowLat = false;
    for (SDep& SuccDep : SU->Succs) {
      SUnit *Succ = SuccDep.getSUnit();
      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
        continue;
      if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
        CopyForLowLat = true;
      }
    }
    if (!CopyForLowLat)
      continue;
    if (MinPos < i) {
      for (unsigned u = i; u > MinPos; --u) {
        ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
        ScheduledSUnits[u] = ScheduledSUnits[u-1];
      }
      ScheduledSUnits[MinPos] = SU->NodeNum;
      ScheduledSUnitsInv[SU->NodeNum] = MinPos;
    }
  }
}
1866}

1868void SIScheduleDAGMI::restoreSULinksLeft() {
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
  SUnits[i].isScheduled = false;
  SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
  SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
  SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
  SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
}
1876}

1878// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1879template<typename _Iterator> void
1880SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
                                unsigned &VgprUsage, unsigned &SgprUsage) {
VgprUsage = 0;
SgprUsage = 0;
for (_Iterator RegI = First; RegI != End; ++RegI) {
  Register Reg = *RegI;
  // For now only track virtual registers
  if (!Reg.isVirtual())
    continue;
  PSetIterator PSetI = MRI.getPressureSets(Reg);
  for (; PSetI.isValid(); ++PSetI) {
    if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
      VgprUsage += PSetI.getWeight();
    else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
      SgprUsage += PSetI.getWeight();
  }
}
1897}

1899void SIScheduleDAGMI::schedule()
1900{
SmallVector<SUnit*, 8> TopRoots, BotRoots;
SIScheduleBlockResult Best, Temp;
LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false);
1
Assuming 'DebugFlag' is false→
2
←
Loop condition is false.  Exiting loop→

buildDAGWithRegPressure();
LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dump(); } } while (false);
3
←
Assuming 'DebugFlag' is false→
4
←
Loop condition is false.  Exiting loop→

topologicalSort();
findRootsAndBiasEdges(TopRoots, BotRoots);
// We reuse several ScheduleDAGMI and ScheduleDAGMILive
// functions, but to make them happy we must initialize
// the default Scheduler implementation (even if we do not
// run it)
SchedImpl->initialize(this);
initQueues(TopRoots, BotRoots);

// Fill some stats to help scheduling.

SUnitsLinksBackup = SUnits;
IsLowLatencySU.clear();
LowLatencyOffset.clear();
IsHighLatencySU.clear();

IsLowLatencySU.resize(SUnits.size(), 0);
LowLatencyOffset.resize(SUnits.size(), 0);
IsHighLatencySU.resize(SUnits.size(), 0);

for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
5
←
Assuming 'i' is equal to 'e'→
6
←
Loop condition is false. Execution continues on line 1942→
  SUnit *SU = &SUnits[i];
  const MachineOperand *BaseLatOp;
  int64_t OffLatReg;
  if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
    IsLowLatencySU[i] = 1;
    bool OffsetIsScalable;
    if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
                                       OffsetIsScalable, TRI))
      LowLatencyOffset[i] = OffLatReg;
  } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
    IsHighLatencySU[i] = 1;
}

SIScheduler Scheduler(this);
Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
7
←
Calling 'SIScheduler::scheduleVariant'→
                                 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);

// if VGPR usage is extremely high, try other good performing variants
// which could lead to lower VGPR usage
if (Best.MaxVGPRUsage > 180) {
  static const std::pair<SISchedulerBlockCreatorVariant,
                         SISchedulerBlockSchedulerVariant>
      Variants[] = {
    { LatenciesAlone, BlockRegUsageLatency },
1953//      { LatenciesAlone, BlockRegUsage },
    { LatenciesGrouped, BlockLatencyRegUsage },
1955//      { LatenciesGrouped, BlockRegUsageLatency },
1956//      { LatenciesGrouped, BlockRegUsage },
    { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1958//      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1959//      { LatenciesAlonePlusConsecutive, BlockRegUsage }
  };
  for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
    Temp = Scheduler.scheduleVariant(v.first, v.second);
    if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
      Best = Temp;
  }
}
// if VGPR usage is still extremely high, we may spill. Try other variants
// which are less performing, but that could lead to lower VGPR usage.
if (Best.MaxVGPRUsage > 200) {
  static const std::pair<SISchedulerBlockCreatorVariant,
                         SISchedulerBlockSchedulerVariant>
      Variants[] = {
1973//      { LatenciesAlone, BlockRegUsageLatency },
    { LatenciesAlone, BlockRegUsage },
1975//      { LatenciesGrouped, BlockLatencyRegUsage },
    { LatenciesGrouped, BlockRegUsageLatency },
    { LatenciesGrouped, BlockRegUsage },
1978//      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
    { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
    { LatenciesAlonePlusConsecutive, BlockRegUsage }
  };
  for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
    Temp = Scheduler.scheduleVariant(v.first, v.second);
    if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
      Best = Temp;
  }
}

ScheduledSUnits = Best.SUs;
ScheduledSUnitsInv.resize(SUnits.size());

for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
  ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
}

moveLowLatencies();

// Tell the outside world about the result of the scheduling.

assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
 && "bad initial Top tracker") ? void (0) : __assert_fail
 ("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2000, __extension__ __PRETTY_FUNCTION__));
TopRPTracker.setPos(CurrentTop);

for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
     E = ScheduledSUnits.end(); I != E; ++I) {
  SUnit *SU = &SUnits[*I];

  scheduleMI(SU, true);

  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
 SU->NodeNum << ") " << *SU->getInstr(); } }
 while (false)
                    << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
 SU->NodeNum << ") " << *SU->getInstr(); } }
 while (false);
}

assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
 "Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2013, __extension__ __PRETTY_FUNCTION__));

placeDebugValues();

LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
  dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
         << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
  dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
  dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
})do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false);
2023}