/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp

Bug Summary

File:	lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:	line 1679, column 3 Called C++ object pointer is null
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn329677/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AMDGPU -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-04-11-031539-24776-1 -x c++ /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14 
15#include "SIMachineScheduler.h"
16#include "AMDGPU.h"
17#include "SIInstrInfo.h"
18#include "SIRegisterInfo.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/CodeGen/LiveInterval.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/CodeGen/RegisterPressure.h"
27#include "llvm/CodeGen/SlotIndexes.h"
28#include "llvm/CodeGen/TargetRegisterInfo.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/raw_ostream.h"
32#include <algorithm>
33#include <cassert>
34#include <map>
35#include <set>
36#include <utility>
37#include <vector>
38 
39using namespace llvm;
40 
41#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
42 
43// This scheduler implements a different scheduling algorithm than
44// GenericScheduler.
45//
46// There are several specific architecture behaviours that can't be modelled
47// for GenericScheduler:
48// . When accessing the result of an SGPR load instruction, you have to wait
49// for all the SGPR load instructions before your current instruction to
50// have finished.
51// . When accessing the result of an VGPR load instruction, you have to wait
52// for all the VGPR load instructions previous to the VGPR load instruction
53// you are interested in to finish.
54// . The less the register pressure, the best load latencies are hidden
55//
56// Moreover some specifities (like the fact a lot of instructions in the shader
57// have few dependencies) makes the generic scheduler have some unpredictable
58// behaviours. For example when register pressure becomes high, it can either
59// manage to prevent register pressure from going too high, or it can
60// increase register pressure even more than if it hadn't taken register
61// pressure into account.
62//
63// Also some other bad behaviours are generated, like loading at the beginning
64// of the shader a constant in VGPR you won't need until the end of the shader.
65//
66// The scheduling problem for SI can distinguish three main parts:
67// . Hiding high latencies (texture sampling, etc)
68// . Hiding low latencies (SGPR constant loading, etc)
69// . Keeping register usage low for better latency hiding and general
70//   performance
71//
72// Some other things can also affect performance, but are hard to predict
73// (cache usage, the fact the HW can issue several instructions from different
74// wavefronts if different types, etc)
75//
76// This scheduler tries to solve the scheduling problem by dividing it into
77// simpler sub-problems. It divides the instructions into blocks, schedules
78// locally inside the blocks where it takes care of low latencies, and then
79// chooses the order of the blocks by taking care of high latencies.
80// Dividing the instructions into blocks helps control keeping register
81// usage low.
82//
83// First the instructions are put into blocks.
84//   We want the blocks help control register usage and hide high latencies
85//   later. To help control register usage, we typically want all local
86//   computations, when for example you create a result that can be comsummed
87//   right away, to be contained in a block. Block inputs and outputs would
88//   typically be important results that are needed in several locations of
89//   the shader. Since we do want blocks to help hide high latencies, we want
90//   the instructions inside the block to have a minimal set of dependencies
91//   on high latencies. It will make it easy to pick blocks to hide specific
92//   high latencies.
93//   The block creation algorithm is divided into several steps, and several
94//   variants can be tried during the scheduling process.
95//
96// Second the order of the instructions inside the blocks is chosen.
97//   At that step we do take into account only register usage and hiding
98//   low latency instructions
99//
100// Third the block order is chosen, there we try to hide high latencies
101// and keep register usage low.
102//
103// After the third step, a pass is done to improve the hiding of low
104// latencies.
105//
106// Actually when talking about 'low latency' or 'high latency' it includes
107// both the latency to get the cache (or global mem) data go to the register,
108// and the bandwidth limitations.
109// Increasing the number of active wavefronts helps hide the former, but it
110// doesn't solve the latter, thus why even if wavefront count is high, we have
111// to try have as many instructions hiding high latencies as possible.
112// The OpenCL doc says for example latency of 400 cycles for a global mem access,
113// which is hidden by 10 instructions if the wavefront count is 10.
114 
115// Some figures taken from AMD docs:
116// Both texture and constant L1 caches are 4-way associative with 64 bytes
117// lines.
118// Constant cache is shared with 4 CUs.
119// For texture sampling, the address generation unit receives 4 texture
120// addresses per cycle, thus we could expect texture sampling latency to be
121// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
122// instructions in a wavefront group are executed every 4 cycles),
123// or 16 instructions if the other wavefronts associated to the 3 other VALUs
124// of the CU do texture sampling too. (Don't take these figures too seriously,
125// as I'm not 100% sure of the computation)
126// Data exports should get similar latency.
127// For constant loading, the cache is shader with 4 CUs.
128// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
129// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
130// It means a simple s_buffer_load should take one instruction to hide, as
131// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
132// cache line.
133//
134// As of today the driver doesn't preload the constants in cache, thus the
135// first loads get extra latency. The doc says global memory access can be
136// 300-600 cycles. We do not specially take that into account when scheduling
137// As we expect the driver to be able to preload the constants soon.
138 
139// common code //
140 
141#ifndef NDEBUG
142 
143static const char *getReasonStr(SIScheduleCandReason Reason) {
144  switch (Reason) {
145  case NoCand:         return "NOCAND";
146  case RegUsage:       return "REGUSAGE";
147  case Latency:        return "LATENCY";
148  case Successor:      return "SUCCESSOR";
149  case Depth:          return "DEPTH";
150  case NodeOrder:      return "ORDER";
151  }
152  llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 152);
153}
154 
155#endif
156 
157static bool tryLess(int TryVal, int CandVal,
158                    SISchedulerCandidate &TryCand,
159                    SISchedulerCandidate &Cand,
160                    SIScheduleCandReason Reason) {
161  if (TryVal < CandVal) {
162    TryCand.Reason = Reason;
163    return true;
164  }
165  if (TryVal > CandVal) {
166    if (Cand.Reason > Reason)
167      Cand.Reason = Reason;
168    return true;
169  }
170  Cand.setRepeat(Reason);
171  return false;
172}
173 
174static bool tryGreater(int TryVal, int CandVal,
175                       SISchedulerCandidate &TryCand,
176                       SISchedulerCandidate &Cand,
177                       SIScheduleCandReason Reason) {
178  if (TryVal > CandVal) {
179    TryCand.Reason = Reason;
180    return true;
181  }
182  if (TryVal < CandVal) {
183    if (Cand.Reason > Reason)
184      Cand.Reason = Reason;
185    return true;
186  }
187  Cand.setRepeat(Reason);
188  return false;
189}
190 
191// SIScheduleBlock //
192 
193void SIScheduleBlock::addUnit(SUnit *SU) {
194  NodeNum2Index[SU->NodeNum] = SUnits.size();
195  SUnits.push_back(SU);
196}
197 
198#ifndef NDEBUG
199void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
200 
201  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
202  dbgs() << '\n';
203}
204#endif
205 
206void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
207                                          SISchedCandidate &TryCand) {
208  // Initialize the candidate if needed.
209  if (!Cand.isValid()) {
210    TryCand.Reason = NodeOrder;
211    return;
212  }
213 
214  if (Cand.SGPRUsage > 60 &&
215      tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
216    return;
217 
218  // Schedule low latency instructions as top as possible.
219  // Order of priority is:
220  // . Low latency instructions which do not depend on other low latency
221  //   instructions we haven't waited for
222  // . Other instructions which do not depend on low latency instructions
223  //   we haven't waited for
224  // . Low latencies
225  // . All other instructions
226  // Goal is to get: low latency instructions - independent instructions
227  //     - (eventually some more low latency instructions)
228  //     - instructions that depend on the first low latency instructions.
229  // If in the block there is a lot of constant loads, the SGPR usage
230  // could go quite high, thus above the arbitrary limit of 60 will encourage
231  // use the already loaded constants (in order to release some SGPRs) before
232  // loading more.
233  if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
234              Cand.HasLowLatencyNonWaitedParent,
235              TryCand, Cand, SIScheduleCandReason::Depth))
236    return;
237 
238  if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
239                 TryCand, Cand, SIScheduleCandReason::Depth))
240    return;
241 
242  if (TryCand.IsLowLatency &&
243      tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
244              TryCand, Cand, SIScheduleCandReason::Depth))
245    return;
246 
247  if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
248    return;
249 
250  // Fall through to original instruction order.
251  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
252    TryCand.Reason = NodeOrder;
253  }
254}
255 
256SUnit* SIScheduleBlock::pickNode() {
257  SISchedCandidate TopCand;
258 
259  for (SUnit* SU : TopReadySUs) {
260    SISchedCandidate TryCand;
261    std::vector<unsigned> pressure;
262    std::vector<unsigned> MaxPressure;
263    // Predict register usage after this instruction.
264    TryCand.SU = SU;
265    TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
266    TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
267    TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
268    TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
269    TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
270    TryCand.HasLowLatencyNonWaitedParent =
271      HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
272    tryCandidateTopDown(TopCand, TryCand);
273    if (TryCand.Reason != NoCand)
274      TopCand.setBest(TryCand);
275  }
276 
277  return TopCand.SU;
278}
279 
280 
281// Schedule something valid.
282void SIScheduleBlock::fastSchedule() {
283  TopReadySUs.clear();
284  if (Scheduled)
285    undoSchedule();
286 
287  for (SUnit* SU : SUnits) {
288    if (!SU->NumPredsLeft)
289      TopReadySUs.push_back(SU);
290  }
291 
292  while (!TopReadySUs.empty()) {
293    SUnit *SU = TopReadySUs[0];
294    ScheduledSUnits.push_back(SU);
295    nodeScheduled(SU);
296  }
297 
298  Scheduled = true;
299}
300 
301// Returns if the register was set between first and last.
302static bool isDefBetween(unsigned Reg,
303                           SlotIndex First, SlotIndex Last,
304                           const MachineRegisterInfo *MRI,
305                           const LiveIntervals *LIS) {
306  for (MachineRegisterInfo::def_instr_iterator
307       UI = MRI->def_instr_begin(Reg),
308       UE = MRI->def_instr_end(); UI != UE; ++UI) {
309    const MachineInstr* MI = &*UI;
310    if (MI->isDebugValue())
311      continue;
312    SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
313    if (InstSlot >= First && InstSlot <= Last)
314      return true;
315  }
316  return false;
317}
318 
319void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
320                                      MachineBasicBlock::iterator EndBlock) {
321  IntervalPressure Pressure, BotPressure;
322  RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
323  LiveIntervals *LIS = DAG->getLIS();
324  MachineRegisterInfo *MRI = DAG->getMRI();
325  DAG->initRPTracker(TopRPTracker);
326  DAG->initRPTracker(BotRPTracker);
327  DAG->initRPTracker(RPTracker);
328 
329  // Goes though all SU. RPTracker captures what had to be alive for the SUs
330  // to execute, and what is still alive at the end.
331  for (SUnit* SU : ScheduledSUnits) {
332    RPTracker.setPos(SU->getInstr());
333    RPTracker.advance();
334  }
335 
336  // Close the RPTracker to finalize live ins/outs.
337  RPTracker.closeRegion();
338 
339  // Initialize the live ins and live outs.
340  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
341  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
342 
343  // Do not Track Physical Registers, because it messes up.
344  for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
345    if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit))
346      LiveInRegs.insert(RegMaskPair.RegUnit);
347  }
348  LiveOutRegs.clear();
349  // There is several possibilities to distinguish:
350  // 1) Reg is not input to any instruction in the block, but is output of one
351  // 2) 1) + read in the block and not needed after it
352  // 3) 1) + read in the block but needed in another block
353  // 4) Reg is input of an instruction but another block will read it too
354  // 5) Reg is input of an instruction and then rewritten in the block.
355  //    result is not read in the block (implies used in another block)
356  // 6) Reg is input of an instruction and then rewritten in the block.
357  //    result is read in the block and not needed in another block
358  // 7) Reg is input of an instruction and then rewritten in the block.
359  //    result is read in the block but also needed in another block
360  // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
361  // We want LiveOutRegs to contain only Regs whose content will be read after
362  // in another block, and whose content was written in the current block,
363  // that is we want it to get 1, 3, 5, 7
364  // Since we made the MIs of a block to be packed all together before
365  // scheduling, then the LiveIntervals were correct, and the RPTracker was
366  // able to correctly handle 5 vs 6, 2 vs 3.
367  // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
368  // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
369  // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
370  // The use of findDefBetween removes the case 4.
371  for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
372    unsigned Reg = RegMaskPair.RegUnit;
373    if (TargetRegisterInfo::isVirtualRegister(Reg) &&
374        isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
375                     LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
376                     LIS)) {
377      LiveOutRegs.insert(Reg);
378    }
379  }
380 
381  // Pressure = sum_alive_registers register size
382  // Internally llvm will represent some registers as big 128 bits registers
383  // for example, but they actually correspond to 4 actual 32 bits registers.
384  // Thus Pressure is not equal to num_alive_registers * constant.
385  LiveInPressure = TopPressure.MaxSetPressure;
386  LiveOutPressure = BotPressure.MaxSetPressure;
387 
388  // Prepares TopRPTracker for top down scheduling.
389  TopRPTracker.closeTop();
390}
391 
392void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
393                               MachineBasicBlock::iterator EndBlock) {
394  if (!Scheduled)
395    fastSchedule();
396 
397  // PreScheduling phase to set LiveIn and LiveOut.
398  initRegPressure(BeginBlock, EndBlock);
399  undoSchedule();
400 
401  // Schedule for real now.
402 
403  TopReadySUs.clear();
404 
405  for (SUnit* SU : SUnits) {
406    if (!SU->NumPredsLeft)
407      TopReadySUs.push_back(SU);
408  }
409 
410  while (!TopReadySUs.empty()) {
411    SUnit *SU = pickNode();
412    ScheduledSUnits.push_back(SU);
413    TopRPTracker.setPos(SU->getInstr());
414    TopRPTracker.advance();
415    nodeScheduled(SU);
416  }
417 
418  // TODO: compute InternalAdditionnalPressure.
419  InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
420 
421  // Check everything is right.
422#ifndef NDEBUG
423  assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
 ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __extension__ __PRETTY_FUNCTION__))
424            TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
 ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __extension__ __PRETTY_FUNCTION__));
425  for (SUnit* SU : SUnits) {
426    assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __extension__ __PRETTY_FUNCTION__))
427              SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __extension__ __PRETTY_FUNCTION__));
428  }
429#endif
430 
431  Scheduled = true;
432}
433 
434void SIScheduleBlock::undoSchedule() {
435  for (SUnit* SU : SUnits) {
436    SU->isScheduled = false;
437    for (SDep& Succ : SU->Succs) {
438      if (BC->isSUInBlock(Succ.getSUnit(), ID))
439        undoReleaseSucc(SU, &Succ);
440    }
441  }
442  HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
443  ScheduledSUnits.clear();
444  Scheduled = false;
445}
446 
447void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
448  SUnit *SuccSU = SuccEdge->getSUnit();
449 
450  if (SuccEdge->isWeak()) {
451    ++SuccSU->WeakPredsLeft;
452    return;
453  }
454  ++SuccSU->NumPredsLeft;
455}
456 
457void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
458  SUnit *SuccSU = SuccEdge->getSUnit();
459 
460  if (SuccEdge->isWeak()) {
461    --SuccSU->WeakPredsLeft;
462    return;
463  }
464#ifndef NDEBUG
465  if (SuccSU->NumPredsLeft == 0) {
466    dbgs() << "*** Scheduling failed! ***\n";
467    SuccSU->dump(DAG);
468    dbgs() << " has been released too many times!\n";
469    llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 469);
470  }
471#endif
472 
473  --SuccSU->NumPredsLeft;
474}
475 
476/// Release Successors of the SU that are in the block or not.
477void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
478  for (SDep& Succ : SU->Succs) {
479    SUnit *SuccSU = Succ.getSUnit();
480 
481    if (SuccSU->NodeNum >= DAG->SUnits.size())
482        continue;
483 
484    if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
485      continue;
486 
487    releaseSucc(SU, &Succ);
488    if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
489      TopReadySUs.push_back(SuccSU);
490  }
491}
492 
493void SIScheduleBlock::nodeScheduled(SUnit *SU) {
494  // Is in TopReadySUs
495  assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
 __assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 495, __extension__ __PRETTY_FUNCTION__));
496  std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
497  if (I == TopReadySUs.end()) {
498    dbgs() << "Data Structure Bug in SI Scheduler\n";
499    llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 499);
500  }
501  TopReadySUs.erase(I);
502 
503  releaseSuccessors(SU, true);
504  // Scheduling this node will trigger a wait,
505  // thus propagate to other instructions that they do not need to wait either.
506  if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
507    HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
508 
509  if (DAG->IsLowLatencySU[SU->NodeNum]) {
510     for (SDep& Succ : SU->Succs) {
511      std::map<unsigned, unsigned>::iterator I =
512        NodeNum2Index.find(Succ.getSUnit()->NodeNum);
513      if (I != NodeNum2Index.end())
514        HasLowLatencyNonWaitedParent[I->second] = 1;
515    }
516  }
517  SU->isScheduled = true;
518}
519 
520void SIScheduleBlock::finalizeUnits() {
521  // We remove links from outside blocks to enable scheduling inside the block.
522  for (SUnit* SU : SUnits) {
523    releaseSuccessors(SU, false);
524    if (DAG->IsHighLatencySU[SU->NodeNum])
525      HighLatencyBlock = true;
526  }
527  HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
528}
529 
530// we maintain ascending order of IDs
531void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
532  unsigned PredID = Pred->getID();
533 
534  // Check if not already predecessor.
535  for (SIScheduleBlock* P : Preds) {
536    if (PredID == P->getID())
537      return;
538  }
539  Preds.push_back(Pred);
540 
541  assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
542                 [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
543                     SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
544                   return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
545                    }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
546         "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
 : __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__));
547}
548 
549void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
550                              SIScheduleBlockLinkKind Kind) {
551  unsigned SuccID = Succ->getID();
552 
553  // Check if not already predecessor.
554  for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
555    if (SuccID == S.first->getID()) {
556      if (S.second == SIScheduleBlockLinkKind::NoData &&
557          Kind == SIScheduleBlockLinkKind::Data)
558        S.second = Kind;
559      return;
560    }
561  }
562  if (Succ->isHighLatencyBlock())
563    ++NumHighLatencySuccessors;
564  Succs.push_back(std::make_pair(Succ, Kind));
565 
566  assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
567                 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
568         "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
 *P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__));
569}
570 
571#ifndef NDEBUG
572void SIScheduleBlock::printDebug(bool full) {
573  dbgs() << "Block (" << ID << ")\n";
574  if (!full)
575    return;
576 
577  dbgs() << "\nContains High Latency Instruction: "
578         << HighLatencyBlock << '\n';
579  dbgs() << "\nDepends On:\n";
580  for (SIScheduleBlock* P : Preds) {
581    P->printDebug(false);
582  }
583 
584  dbgs() << "\nSuccessors:\n";
585  for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
586    if (S.second == SIScheduleBlockLinkKind::Data)
587      dbgs() << "(Data Dep) ";
588    S.first->printDebug(false);
589  }
590 
591  if (Scheduled) {
592    dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
593           << LiveInPressure[DAG->getVGPRSetID()] << '\n';
594    dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
595           << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
596    dbgs() << "LiveIns:\n";
597    for (unsigned Reg : LiveInRegs)
598      dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
599 
600    dbgs() << "\nLiveOuts:\n";
601    for (unsigned Reg : LiveOutRegs)
602      dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
603  }
604 
605  dbgs() << "\nInstructions:\n";
606  if (!Scheduled) {
607    for (SUnit* SU : SUnits) {
608      SU->dump(DAG);
609    }
610  } else {
611    for (SUnit* SU : SUnits) {
612      SU->dump(DAG);
613    }
614  }
615 
616  dbgs() << "///////////////////////\n";
617}
618#endif
619 
620// SIScheduleBlockCreator //
621 
622SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
623DAG(DAG) {
624}
625 
626SIScheduleBlockCreator::~SIScheduleBlockCreator() = default;
627 
628SIScheduleBlocks
629SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
630  std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
631    Blocks.find(BlockVariant);
632  if (B == Blocks.end()) {
633    SIScheduleBlocks Res;
634    createBlocksForVariant(BlockVariant);
635    topologicalSort();
636    scheduleInsideBlocks();
637    fillStats();
638    Res.Blocks = CurrentBlocks;
639    Res.TopDownIndex2Block = TopDownIndex2Block;
640    Res.TopDownBlock2Index = TopDownBlock2Index;
641    Blocks[BlockVariant] = Res;
642    return Res;
643  } else {
644    return B->second;
645  }
646}
647 
648bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
649  if (SU->NodeNum >= DAG->SUnits.size())
650    return false;
651  return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
652}
653 
654void SIScheduleBlockCreator::colorHighLatenciesAlone() {
655  unsigned DAGSize = DAG->SUnits.size();
656 
657  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
658    SUnit *SU = &DAG->SUnits[i];
659    if (DAG->IsHighLatencySU[SU->NodeNum]) {
660      CurrentColoring[SU->NodeNum] = NextReservedID++;
661    }
662  }
663}
664 
665static bool
666hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
667  for (const auto &PredDep : SU.Preds) {
668    if (PredDep.getSUnit() == &FromSU &&
669        PredDep.getKind() == llvm::SDep::Data)
670      return true;
671  }
672  return false;
673}
674 
675void SIScheduleBlockCreator::colorHighLatenciesGroups() {
676  unsigned DAGSize = DAG->SUnits.size();
677  unsigned NumHighLatencies = 0;
678  unsigned GroupSize;
679  int Color = NextReservedID;
680  unsigned Count = 0;
681  std::set<unsigned> FormingGroup;
682 
683  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
684    SUnit *SU = &DAG->SUnits[i];
685    if (DAG->IsHighLatencySU[SU->NodeNum])
686      ++NumHighLatencies;
687  }
688 
689  if (NumHighLatencies == 0)
690    return;
691 
692  if (NumHighLatencies <= 6)
693    GroupSize = 2;
694  else if (NumHighLatencies <= 12)
695    GroupSize = 3;
696  else
697    GroupSize = 4;
698 
699  for (unsigned SUNum : DAG->TopDownIndex2SU) {
700    const SUnit &SU = DAG->SUnits[SUNum];
701    if (DAG->IsHighLatencySU[SU.NodeNum]) {
702      unsigned CompatibleGroup = true;
703      int ProposedColor = Color;
704      std::vector<int> AdditionalElements;
705 
706      // We don't want to put in the same block
707      // two high latency instructions that depend
708      // on each other.
709      // One way would be to check canAddEdge
710      // in both directions, but that currently is not
711      // enough because there the high latency order is
712      // enforced (via links).
713      // Instead, look at the dependencies between the
714      // high latency instructions and deduce if it is
715      // a data dependency or not.
716      for (unsigned j : FormingGroup) {
717        bool HasSubGraph;
718        std::vector<int> SubGraph;
719        // By construction (topological order), if SU and
720        // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
721        // in the parent graph of SU.
722#ifndef NDEBUG
723        SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
724                                               HasSubGraph);
725        assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
 ("!HasSubGraph", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 725, __extension__ __PRETTY_FUNCTION__));
726#endif
727        SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
728                                               HasSubGraph);
729        if (!HasSubGraph)
730          continue; // No dependencies between each other
731        else if (SubGraph.size() > 5) {
732          // Too many elements would be required to be added to the block.
733          CompatibleGroup = false;
734          break;
735        }
736        else {
737          // Check the type of dependency
738          for (unsigned k : SubGraph) {
739            // If in the path to join the two instructions,
740            // there is another high latency instruction,
741            // or instructions colored for another block
742            // abort the merge.
743            if (DAG->IsHighLatencySU[k] ||
744                (CurrentColoring[k] != ProposedColor &&
745                 CurrentColoring[k] != 0)) {
746              CompatibleGroup = false;
747              break;
748            }
749            // If one of the SU in the subgraph depends on the result of SU j,
750            // there'll be a data dependency.
751            if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
752              CompatibleGroup = false;
753              break;
754            }
755          }
756          if (!CompatibleGroup)
757            break;
758          // Same check for the SU
759          if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
760            CompatibleGroup = false;
761            break;
762          }
763          // Add all the required instructions to the block
764          // These cannot live in another block (because they
765          // depend (order dependency) on one of the
766          // instruction in the block, and are required for the
767          // high latency instruction we add.
768          AdditionalElements.insert(AdditionalElements.end(),
769                                    SubGraph.begin(), SubGraph.end());
770        }
771      }
772      if (CompatibleGroup) {
773        FormingGroup.insert(SU.NodeNum);
774        for (unsigned j : AdditionalElements)
775          CurrentColoring[j] = ProposedColor;
776        CurrentColoring[SU.NodeNum] = ProposedColor;
777        ++Count;
778      }
779      // Found one incompatible instruction,
780      // or has filled a big enough group.
781      // -> start a new one.
782      if (!CompatibleGroup) {
783        FormingGroup.clear();
784        Color = ++NextReservedID;
785        ProposedColor = Color;
786        FormingGroup.insert(SU.NodeNum);
787        CurrentColoring[SU.NodeNum] = ProposedColor;
788        Count = 0;
789      } else if (Count == GroupSize) {
790        FormingGroup.clear();
791        Color = ++NextReservedID;
792        ProposedColor = Color;
793        Count = 0;
794      }
795    }
796  }
797}
798 
799void SIScheduleBlockCreator::colorComputeReservedDependencies() {
800  unsigned DAGSize = DAG->SUnits.size();
801  std::map<std::set<unsigned>, unsigned> ColorCombinations;
802 
803  CurrentTopDownReservedDependencyColoring.clear();
804  CurrentBottomUpReservedDependencyColoring.clear();
805 
806  CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
807  CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
808 
809  // Traverse TopDown, and give different colors to SUs depending
810  // on which combination of High Latencies they depend on.
811 
812  for (unsigned SUNum : DAG->TopDownIndex2SU) {
813    SUnit *SU = &DAG->SUnits[SUNum];
814    std::set<unsigned> SUColors;
815 
816    // Already given.
817    if (CurrentColoring[SU->NodeNum]) {
818      CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
819        CurrentColoring[SU->NodeNum];
820      continue;
821    }
822 
823   for (SDep& PredDep : SU->Preds) {
824      SUnit *Pred = PredDep.getSUnit();
825      if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
826        continue;
827      if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
828        SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
829    }
830    // Color 0 by default.
831    if (SUColors.empty())
832      continue;
833    // Same color than parents.
834    if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
835      CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
836        *SUColors.begin();
837    else {
838      std::map<std::set<unsigned>, unsigned>::iterator Pos =
839        ColorCombinations.find(SUColors);
840      if (Pos != ColorCombinations.end()) {
841          CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
842      } else {
843        CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
844          NextNonReservedID;
845        ColorCombinations[SUColors] = NextNonReservedID++;
846      }
847    }
848  }
849 
850  ColorCombinations.clear();
851 
852  // Same as before, but BottomUp.
853 
854  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
855    SUnit *SU = &DAG->SUnits[SUNum];
856    std::set<unsigned> SUColors;
857 
858    // Already given.
859    if (CurrentColoring[SU->NodeNum]) {
860      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
861        CurrentColoring[SU->NodeNum];
862      continue;
863    }
864 
865    for (SDep& SuccDep : SU->Succs) {
866      SUnit *Succ = SuccDep.getSUnit();
867      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
868        continue;
869      if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
870        SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
871    }
872    // Keep color 0.
873    if (SUColors.empty())
874      continue;
875    // Same color than parents.
876    if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
877      CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
878        *SUColors.begin();
879    else {
880      std::map<std::set<unsigned>, unsigned>::iterator Pos =
881        ColorCombinations.find(SUColors);
882      if (Pos != ColorCombinations.end()) {
883        CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
884      } else {
885        CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
886          NextNonReservedID;
887        ColorCombinations[SUColors] = NextNonReservedID++;
888      }
889    }
890  }
891}
892 
893void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
894  unsigned DAGSize = DAG->SUnits.size();
895  std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
896 
897  // Every combination of colors given by the top down
898  // and bottom up Reserved node dependency
899 
900  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
901    SUnit *SU = &DAG->SUnits[i];
902    std::pair<unsigned, unsigned> SUColors;
903 
904    // High latency instructions: already given.
905    if (CurrentColoring[SU->NodeNum])
906      continue;
907 
908    SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
909    SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
910 
911    std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
912      ColorCombinations.find(SUColors);
913    if (Pos != ColorCombinations.end()) {
914      CurrentColoring[SU->NodeNum] = Pos->second;
915    } else {
916      CurrentColoring[SU->NodeNum] = NextNonReservedID;
917      ColorCombinations[SUColors] = NextNonReservedID++;
918    }
919  }
920}
921 
922void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
923  unsigned DAGSize = DAG->SUnits.size();
924  std::vector<int> PendingColoring = CurrentColoring;
925 
926  assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__))
927         CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__))
928         CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__));
929  // If there is no reserved block at all, do nothing. We don't want
930  // everything in one block.
931  if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
932                        CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
933      *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
934                        CurrentTopDownReservedDependencyColoring.end()) == 0)
935    return;
936 
937  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
938    SUnit *SU = &DAG->SUnits[SUNum];
939    std::set<unsigned> SUColors;
940    std::set<unsigned> SUColorsPending;
941 
942    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
943      continue;
944 
945    if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
946        CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
947      continue;
948 
949    for (SDep& SuccDep : SU->Succs) {
950      SUnit *Succ = SuccDep.getSUnit();
951      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
952        continue;
953      if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
954          CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
955        SUColors.insert(CurrentColoring[Succ->NodeNum]);
956      SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
957    }
958    // If there is only one child/parent block, and that block
959    // is not among the ones we are removing in this path, then
960    // merge the instruction to that block
961    if (SUColors.size() == 1 && SUColorsPending.size() == 1)
962      PendingColoring[SU->NodeNum] = *SUColors.begin();
963    else // TODO: Attribute new colors depending on color
964         // combination of children.
965      PendingColoring[SU->NodeNum] = NextNonReservedID++;
966  }
967  CurrentColoring = PendingColoring;
968}
969 
970 
971void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
972  unsigned DAGSize = DAG->SUnits.size();
973  unsigned PreviousColor;
974  std::set<unsigned> SeenColors;
975 
976  if (DAGSize <= 1)
977    return;
978 
979  PreviousColor = CurrentColoring[0];
980 
981  for (unsigned i = 1, e = DAGSize; i != e; ++i) {
982    SUnit *SU = &DAG->SUnits[i];
983    unsigned CurrentColor = CurrentColoring[i];
984    unsigned PreviousColorSave = PreviousColor;
985    assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
 ("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 985, __extension__ __PRETTY_FUNCTION__));
986 
987    if (CurrentColor != PreviousColor)
988      SeenColors.insert(PreviousColor);
989    PreviousColor = CurrentColor;
990 
991    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
992      continue;
993 
994    if (SeenColors.find(CurrentColor) == SeenColors.end())
995      continue;
996 
997    if (PreviousColorSave != CurrentColor)
998      CurrentColoring[i] = NextNonReservedID++;
999    else
1000      CurrentColoring[i] = CurrentColoring[i-1];
1001  }
1002}
1003 
1004void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
1005  unsigned DAGSize = DAG->SUnits.size();
1006 
1007  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1008    SUnit *SU = &DAG->SUnits[SUNum];
1009    std::set<unsigned> SUColors;
1010 
1011    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1012      continue;
1013 
1014    // No predecessor: Vgpr constant loading.
1015    // Low latency instructions usually have a predecessor (the address)
1016    if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
1017      continue;
1018 
1019    for (SDep& SuccDep : SU->Succs) {
1020      SUnit *Succ = SuccDep.getSUnit();
1021      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1022        continue;
1023      SUColors.insert(CurrentColoring[Succ->NodeNum]);
1024    }
1025    if (SUColors.size() == 1)
1026      CurrentColoring[SU->NodeNum] = *SUColors.begin();
1027  }
1028}
1029 
1030void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1031  unsigned DAGSize = DAG->SUnits.size();
1032 
1033  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1034    SUnit *SU = &DAG->SUnits[SUNum];
1035    std::set<unsigned> SUColors;
1036 
1037    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1038      continue;
1039 
1040    for (SDep& SuccDep : SU->Succs) {
1041       SUnit *Succ = SuccDep.getSUnit();
1042      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1043        continue;
1044      SUColors.insert(CurrentColoring[Succ->NodeNum]);
1045    }
1046    if (SUColors.size() == 1)
1047      CurrentColoring[SU->NodeNum] = *SUColors.begin();
1048  }
1049}
1050 
1051void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1052  unsigned DAGSize = DAG->SUnits.size();
1053 
1054  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1055    SUnit *SU = &DAG->SUnits[SUNum];
1056    std::set<unsigned> SUColors;
1057 
1058    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1059      continue;
1060 
1061    for (SDep& SuccDep : SU->Succs) {
1062       SUnit *Succ = SuccDep.getSUnit();
1063      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1064        continue;
1065      SUColors.insert(CurrentColoring[Succ->NodeNum]);
1066    }
1067    if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1068      CurrentColoring[SU->NodeNum] = *SUColors.begin();
1069  }
1070}
1071 
1072void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1073  unsigned DAGSize = DAG->SUnits.size();
1074  std::map<unsigned, unsigned> ColorCount;
1075 
1076  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1077    SUnit *SU = &DAG->SUnits[SUNum];
1078    unsigned color = CurrentColoring[SU->NodeNum];
1079     ++ColorCount[color];
1080  }
1081 
1082  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1083    SUnit *SU = &DAG->SUnits[SUNum];
1084    unsigned color = CurrentColoring[SU->NodeNum];
1085    std::set<unsigned> SUColors;
1086 
1087    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1088      continue;
1089 
1090    if (ColorCount[color] > 1)
1091      continue;
1092 
1093    for (SDep& SuccDep : SU->Succs) {
1094       SUnit *Succ = SuccDep.getSUnit();
1095      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1096        continue;
1097      SUColors.insert(CurrentColoring[Succ->NodeNum]);
1098    }
1099    if (SUColors.size() == 1 && *SUColors.begin() != color) {
1100      --ColorCount[color];
1101      CurrentColoring[SU->NodeNum] = *SUColors.begin();
1102      ++ColorCount[*SUColors.begin()];
1103    }
1104  }
1105}
1106 
1107void SIScheduleBlockCreator::cutHugeBlocks() {
1108  // TODO
1109}
1110 
1111void SIScheduleBlockCreator::regroupNoUserInstructions() {
1112  unsigned DAGSize = DAG->SUnits.size();
1113  int GroupID = NextNonReservedID++;
1114 
1115  for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1116    SUnit *SU = &DAG->SUnits[SUNum];
1117    bool hasSuccessor = false;
1118 
1119    if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1120      continue;
1121 
1122    for (SDep& SuccDep : SU->Succs) {
1123       SUnit *Succ = SuccDep.getSUnit();
1124      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1125        continue;
1126      hasSuccessor = true;
1127    }
1128    if (!hasSuccessor)
1129      CurrentColoring[SU->NodeNum] = GroupID;
1130  }
1131}
1132 
1133void SIScheduleBlockCreator::colorExports() {
1134  unsigned ExportColor = NextNonReservedID++;
1135  SmallVector<unsigned, 8> ExpGroup;
1136 
1137  // Put all exports together in a block.
1138  // The block will naturally end up being scheduled last,
1139  // thus putting exports at the end of the schedule, which
1140  // is better for performance.
1141  // However we must ensure, for safety, the exports can be put
1142  // together in the same block without any other instruction.
1143  // This could happen, for example, when scheduling after regalloc
1144  // if reloading a spilled register from memory using the same
1145  // register than used in a previous export.
1146  // If that happens, do not regroup the exports.
1147  for (unsigned SUNum : DAG->TopDownIndex2SU) {
1148    const SUnit &SU = DAG->SUnits[SUNum];
1149    if (SIInstrInfo::isEXP(*SU.getInstr())) {
1150      // Check the EXP can be added to the group safely,
1151      // ie without needing any other instruction.
1152      // The EXP is allowed to depend on other EXP
1153      // (they will be in the same group).
1154      for (unsigned j : ExpGroup) {
1155        bool HasSubGraph;
1156        std::vector<int> SubGraph;
1157        // By construction (topological order), if SU and
1158        // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
1159        // in the parent graph of SU.
1160#ifndef NDEBUG
1161        SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1162                                               HasSubGraph);
1163        assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
 ("!HasSubGraph", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1163, __extension__ __PRETTY_FUNCTION__));
1164#endif
1165        SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1166                                               HasSubGraph);
1167        if (!HasSubGraph)
1168          continue; // No dependencies between each other
1169 
1170        // SubGraph contains all the instructions required
1171        // between EXP SUnits[j] and EXP SU.
1172        for (unsigned k : SubGraph) {
1173          if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1174            // Other instructions than EXP would be required in the group.
1175            // Abort the groupping.
1176            return;
1177        }
1178      }
1179 
1180      ExpGroup.push_back(SUNum);
1181    }
1182  }
1183 
1184  // The group can be formed. Give the color.
1185  for (unsigned j : ExpGroup)
1186    CurrentColoring[j] = ExportColor;
1187}
1188 
1189void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1190  unsigned DAGSize = DAG->SUnits.size();
1191  std::map<unsigned,unsigned> RealID;
1192 
1193  CurrentBlocks.clear();
1194  CurrentColoring.clear();
1195  CurrentColoring.resize(DAGSize, 0);
1196  Node2CurrentBlock.clear();
1197 
1198  // Restore links previous scheduling variant has overridden.
1199  DAG->restoreSULinksLeft();
1200 
1201  NextReservedID = 1;
1202  NextNonReservedID = DAGSize + 1;
1203 
1204  DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false);
1205 
1206  if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1207    colorHighLatenciesGroups();
1208  else
1209    colorHighLatenciesAlone();
1210  colorComputeReservedDependencies();
1211  colorAccordingToReservedDependencies();
1212  colorEndsAccordingToDependencies();
1213  if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1214    colorForceConsecutiveOrderInGroup();
1215  regroupNoUserInstructions();
1216  colorMergeConstantLoadsNextGroup();
1217  colorMergeIfPossibleNextGroupOnlyForReserved();
1218  colorExports();
1219 
1220  // Put SUs of same color into same block
1221  Node2CurrentBlock.resize(DAGSize, -1);
1222  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1223    SUnit *SU = &DAG->SUnits[i];
1224    unsigned Color = CurrentColoring[SU->NodeNum];
1225    if (RealID.find(Color) == RealID.end()) {
1226      int ID = CurrentBlocks.size();
1227      BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG, this, ID));
1228      CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1229      RealID[Color] = ID;
1230    }
1231    CurrentBlocks[RealID[Color]]->addUnit(SU);
1232    Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1233  }
1234 
1235  // Build dependencies between blocks.
1236  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1237    SUnit *SU = &DAG->SUnits[i];
1238    int SUID = Node2CurrentBlock[i];
1239     for (SDep& SuccDep : SU->Succs) {
1240       SUnit *Succ = SuccDep.getSUnit();
1241      if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1242        continue;
1243      if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1244        CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1245                                     SuccDep.isCtrl() ? NoData : Data);
1246    }
1247    for (SDep& PredDep : SU->Preds) {
1248      SUnit *Pred = PredDep.getSUnit();
1249      if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1250        continue;
1251      if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1252        CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1253    }
1254  }
1255 
1256  // Free root and leafs of all blocks to enable scheduling inside them.
1257  for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1258    SIScheduleBlock *Block = CurrentBlocks[i];
1259    Block->finalizeUnits();
1260  }
1261  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1262    dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1263    for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1264      SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1265      Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1266    }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1267  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
 { SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false);
1268}
1269 
1270// Two functions taken from Codegen/MachineScheduler.cpp
1271 
1272/// Non-const version.
1273static MachineBasicBlock::iterator
1274nextIfDebug(MachineBasicBlock::iterator I,
1275            MachineBasicBlock::const_iterator End) {
1276  for (; I != End; ++I) {
1277    if (!I->isDebugValue())
1278      break;
1279  }
1280  return I;
1281}
1282 
1283void SIScheduleBlockCreator::topologicalSort() {
1284  unsigned DAGSize = CurrentBlocks.size();
1285  std::vector<int> WorkList;
1286 
1287  DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false);
1288 
1289  WorkList.reserve(DAGSize);
1290  TopDownIndex2Block.resize(DAGSize);
1291  TopDownBlock2Index.resize(DAGSize);
1292  BottomUpIndex2Block.resize(DAGSize);
1293 
1294  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1295    SIScheduleBlock *Block = CurrentBlocks[i];
1296    unsigned Degree = Block->getSuccs().size();
1297    TopDownBlock2Index[i] = Degree;
1298    if (Degree == 0) {
1299      WorkList.push_back(i);
1300    }
1301  }
1302 
1303  int Id = DAGSize;
1304  while (!WorkList.empty()) {
1305    int i = WorkList.back();
1306    SIScheduleBlock *Block = CurrentBlocks[i];
1307    WorkList.pop_back();
1308    TopDownBlock2Index[i] = --Id;
1309    TopDownIndex2Block[Id] = i;
1310    for (SIScheduleBlock* Pred : Block->getPreds()) {
1311      if (!--TopDownBlock2Index[Pred->getID()])
1312        WorkList.push_back(Pred->getID());
1313    }
1314  }
1315 
1316#ifndef NDEBUG
1317  // Check correctness of the ordering.
1318  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1319    SIScheduleBlock *Block = CurrentBlocks[i];
1320    for (SIScheduleBlock* Pred : Block->getPreds()) {
1321      assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1322, __extension__ __PRETTY_FUNCTION__))
1322      "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1322, __extension__ __PRETTY_FUNCTION__));
1323    }
1324  }
1325#endif
1326 
1327  BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1328                                         TopDownIndex2Block.rend());
1329}
1330 
1331void SIScheduleBlockCreator::scheduleInsideBlocks() {
1332  unsigned DAGSize = CurrentBlocks.size();
1333 
1334  DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false);
1335 
1336  // We do schedule a valid scheduling such that a Block corresponds
1337  // to a range of instructions.
1338  DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false);
1339  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1340    SIScheduleBlock *Block = CurrentBlocks[i];
1341    Block->fastSchedule();
1342  }
1343 
1344  // Note: the following code, and the part restoring previous position
1345  // is by far the most expensive operation of the Scheduler.
1346 
1347  // Do not update CurrentTop.
1348  MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1349  std::vector<MachineBasicBlock::iterator> PosOld;
1350  std::vector<MachineBasicBlock::iterator> PosNew;
1351  PosOld.reserve(DAG->SUnits.size());
1352  PosNew.reserve(DAG->SUnits.size());
1353 
1354  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1355    int BlockIndice = TopDownIndex2Block[i];
1356    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1357    std::vector<SUnit*> SUs = Block->getScheduledUnits();
1358 
1359    for (SUnit* SU : SUs) {
1360      MachineInstr *MI = SU->getInstr();
1361      MachineBasicBlock::iterator Pos = MI;
1362      PosOld.push_back(Pos);
1363      if (&*CurrentTopFastSched == MI) {
1364        PosNew.push_back(Pos);
1365        CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1366                                          DAG->getCurrentBottom());
1367      } else {
1368        // Update the instruction stream.
1369        DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1370 
1371        // Update LiveIntervals.
1372        // Note: Moving all instructions and calling handleMove every time
1373        // is the most cpu intensive operation of the scheduler.
1374        // It would gain a lot if there was a way to recompute the
1375        // LiveIntervals for the entire scheduling region.
1376        DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1377        PosNew.push_back(CurrentTopFastSched);
1378      }
1379    }
1380  }
1381 
1382  // Now we have Block of SUs == Block of MI.
1383  // We do the final schedule for the instructions inside the block.
1384  // The property that all the SUs of the Block are grouped together as MI
1385  // is used for correct reg usage tracking.
1386  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1387    SIScheduleBlock *Block = CurrentBlocks[i];
1388    std::vector<SUnit*> SUs = Block->getScheduledUnits();
1389    Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1390  }
1391 
1392  DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false);
1393  // Restore old ordering (which prevents a LIS->handleMove bug).
1394  for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1395    MachineBasicBlock::iterator POld = PosOld[i-1];
1396    MachineBasicBlock::iterator PNew = PosNew[i-1];
1397    if (PNew != POld) {
1398      // Update the instruction stream.
1399      DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1400 
1401      // Update LiveIntervals.
1402      DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1403    }
1404  }
1405 
1406  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1407    for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1408      SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1409      Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1410    }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1411  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false);
1412}
1413 
1414void SIScheduleBlockCreator::fillStats() {
1415  unsigned DAGSize = CurrentBlocks.size();
1416 
1417  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1418    int BlockIndice = TopDownIndex2Block[i];
1419    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1420    if (Block->getPreds().empty())
1421      Block->Depth = 0;
1422    else {
1423      unsigned Depth = 0;
1424      for (SIScheduleBlock *Pred : Block->getPreds()) {
1425        if (Depth < Pred->Depth + Pred->getCost())
1426          Depth = Pred->Depth + Pred->getCost();
1427      }
1428      Block->Depth = Depth;
1429    }
1430  }
1431 
1432  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1433    int BlockIndice = BottomUpIndex2Block[i];
1434    SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1435    if (Block->getSuccs().empty())
1436      Block->Height = 0;
1437    else {
1438      unsigned Height = 0;
1439      for (const auto &Succ : Block->getSuccs())
1440        Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1441      Block->Height = Height;
1442    }
1443  }
1444}
1445 
1446// SIScheduleBlockScheduler //
1447 
1448SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1449                                                   SISchedulerBlockSchedulerVariant Variant,
1450                                                   SIScheduleBlocks  BlocksStruct) :
1451  DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1452  LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1453  SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1454 
1455  // Fill the usage of every output
1456  // Warning: while by construction we always have a link between two blocks
1457  // when one needs a result from the other, the number of users of an output
1458  // is not the sum of child blocks having as input the same virtual register.
1459  // Here is an example. A produces x and y. B eats x and produces x'.
1460  // C eats x' and y. The register coalescer may have attributed the same
1461  // virtual register to x and x'.
1462  // To count accurately, we do a topological sort. In case the register is
1463  // found for several parents, we increment the usage of the one with the
1464  // highest topological index.
1465  LiveOutRegsNumUsages.resize(Blocks.size());
1466  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1
Assuming 'i' is equal to 'e'→
2
←
Loop condition is false. Execution continues on line 1491→
1467    SIScheduleBlock *Block = Blocks[i];
1468    for (unsigned Reg : Block->getInRegs()) {
1469      bool Found = false;
1470      int topoInd = -1;
1471      for (SIScheduleBlock* Pred: Block->getPreds()) {
1472        std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1473        std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1474 
1475        if (RegPos != PredOutRegs.end()) {
1476          Found = true;
1477          if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1478            topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1479          }
1480        }
1481      }
1482 
1483      if (!Found)
1484        continue;
1485 
1486      int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1487      ++LiveOutRegsNumUsages[PredID][Reg];
1488    }
1489  }
1490 
1491  LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1492  BlockNumPredsLeft.resize(Blocks.size());
1493  BlockNumSuccsLeft.resize(Blocks.size());
1494 
1495  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
3
←
Assuming 'i' is equal to 'e'→
4
←
Loop condition is false. Execution continues on line 1502→
1496    SIScheduleBlock *Block = Blocks[i];
1497    BlockNumPredsLeft[i] = Block->getPreds().size();
1498    BlockNumSuccsLeft[i] = Block->getSuccs().size();
1499  }
1500 
1501#ifndef NDEBUG
1502  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
5
←
Assuming 'i' is equal to 'e'→
6
←
Loop condition is false. Execution continues on line 1508→
1503    SIScheduleBlock *Block = Blocks[i];
1504    assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
 : __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1504, __extension__ __PRETTY_FUNCTION__));
1505  }
1506#endif
1507 
1508  std::set<unsigned> InRegs = DAG->getInRegs();
1509  addLiveRegs(InRegs);
1510 
1511  // Increase LiveOutRegsNumUsages for blocks
1512  // producing registers consumed in another
1513  // scheduling region.
1514  for (unsigned Reg : DAG->getOutRegs()) {
1515    for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1516      // Do reverse traversal
1517      int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1518      SIScheduleBlock *Block = Blocks[ID];
1519      const std::set<unsigned> &OutRegs = Block->getOutRegs();
1520 
1521      if (OutRegs.find(Reg) == OutRegs.end())
1522        continue;
1523 
1524      ++LiveOutRegsNumUsages[ID][Reg];
1525      break;
1526    }
1527  }
1528 
1529  // Fill LiveRegsConsumers for regs that were already
1530  // defined before scheduling.
1531  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
7
←
Assuming 'i' is equal to 'e'→
8
←
Loop condition is false. Execution continues on line 1550→
1532    SIScheduleBlock *Block = Blocks[i];
1533    for (unsigned Reg : Block->getInRegs()) {
1534      bool Found = false;
1535      for (SIScheduleBlock* Pred: Block->getPreds()) {
1536        std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1537        std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1538 
1539        if (RegPos != PredOutRegs.end()) {
1540          Found = true;
1541          break;
1542        }
1543      }
1544 
1545      if (!Found)
1546        ++LiveRegsConsumers[Reg];
1547    }
1548  }
1549 
1550  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
9
←
Assuming 'i' is equal to 'e'→
10
←
Loop condition is false. Execution continues on line 1557→
1551    SIScheduleBlock *Block = Blocks[i];
1552    if (BlockNumPredsLeft[i] == 0) {
1553      ReadyBlocks.push_back(Block);
1554    }
1555  }
1556 
1557  while (SIScheduleBlock *Block = pickBlock()) {
11
←
Calling 'SIScheduleBlockScheduler::pickBlock'→
1558    BlocksScheduled.push_back(Block);
1559    blockScheduled(Block);
1560  }
1561 
1562  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1563    dbgs() << "Block Order:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1564    for (SIScheduleBlock* Block : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1565      dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1566    }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1567    dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false)
1568  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
 << Block->getID(); } dbgs() << '\n';; } } while
 (false);
1569}
1570 
1571bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1572                                                   SIBlockSchedCandidate &TryCand) {
1573  if (!Cand.isValid()) {
1574    TryCand.Reason = NodeOrder;
1575    return true;
1576  }
1577 
1578  // Try to hide high latencies.
1579  if (tryLess(TryCand.LastPosHighLatParentScheduled,
1580              Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1581    return true;
1582  // Schedule high latencies early so you can hide them better.
1583  if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1584                 TryCand, Cand, Latency))
1585    return true;
1586  if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
1587                                          TryCand, Cand, Depth))
1588    return true;
1589  if (tryGreater(TryCand.NumHighLatencySuccessors,
1590                 Cand.NumHighLatencySuccessors,
1591                 TryCand, Cand, Successor))
1592    return true;
1593  return false;
1594}
1595 
1596bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1597                                                    SIBlockSchedCandidate &TryCand) {
1598  if (!Cand.isValid()) {
1599    TryCand.Reason = NodeOrder;
1600    return true;
1601  }
1602 
1603  if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1604              TryCand, Cand, RegUsage))
1605    return true;
1606  if (tryGreater(TryCand.NumSuccessors > 0,
1607                 Cand.NumSuccessors > 0,
1608                 TryCand, Cand, Successor))
1609    return true;
1610  if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1611    return true;
1612  if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1613              TryCand, Cand, RegUsage))
1614    return true;
1615  return false;
1616}
1617 
1618SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1619  SIBlockSchedCandidate Cand;
1620  std::vector<SIScheduleBlock*>::iterator Best;
1621  SIScheduleBlock *Block;
1622  if (ReadyBlocks.empty())
12
←
Assuming the condition is false→
13
←
Taking false branch→
1623    return nullptr;
1624 
1625  DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1626                        VregCurrentUsage, SregCurrentUsage);
1627  if (VregCurrentUsage > maxVregUsage)
14
←
Taking false branch→
1628    maxVregUsage = VregCurrentUsage;
1629  if (SregCurrentUsage > maxSregUsage)
15
←
Taking false branch→
1630    maxSregUsage = SregCurrentUsage;
1631  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1632    dbgs() << "Picking New Blocks\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1633    dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1634    for (SIScheduleBlock* Block : ReadyBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1635      dbgs() << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1636    dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1637    for (unsigned Reg : LiveRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1638      dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1639    dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1640    dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1641    dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false)
1642  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
 ReadyBlocks) dbgs() << Block->getID() << ' ';
 dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
 ' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
 << VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
 << SregCurrentUsage << '\n';; } } while (false);
1643 
1644  Cand.Block = nullptr;
16
←
Null pointer value stored to 'Cand.Block'→
1645  for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
17
←
Loop condition is false. Execution continues on line 1679→
1646       E = ReadyBlocks.end(); I != E; ++I) {
1647    SIBlockSchedCandidate TryCand;
1648    TryCand.Block = *I;
1649    TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1650    TryCand.VGPRUsageDiff =
1651      checkRegUsageImpact(TryCand.Block->getInRegs(),
1652                          TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
1653    TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1654    TryCand.NumHighLatencySuccessors =
1655      TryCand.Block->getNumHighLatencySuccessors();
1656    TryCand.LastPosHighLatParentScheduled =
1657      (unsigned int) std::max<int> (0,
1658         LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1659           LastPosWaitedHighLatency);
1660    TryCand.Height = TryCand.Block->Height;
1661    // Try not to increase VGPR usage too much, else we may spill.
1662    if (VregCurrentUsage > 120 ||
1663        Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1664      if (!tryCandidateRegUsage(Cand, TryCand) &&
1665          Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1666        tryCandidateLatency(Cand, TryCand);
1667    } else {
1668      if (!tryCandidateLatency(Cand, TryCand))
1669        tryCandidateRegUsage(Cand, TryCand);
1670    }
1671    if (TryCand.Reason != NoCand) {
1672      Cand.setBest(TryCand);
1673      Best = I;
1674      DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
 << Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
1675                   << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
 << Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false);
1676    }
1677  }
1678 
1679  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
18
←
Within the expansion of the macro 'DEBUG':
a
Assuming 'DebugFlag' is not equal to 0
b
Assuming the condition is true
c
Called C++ object pointer is null
1680    dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1681    dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1682      << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1683    dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1684           << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1685    dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1686    dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false)
1687  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
 Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
 << (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
 "Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
 << '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
 (false);
1688 
1689  Block = Cand.Block;
1690  ReadyBlocks.erase(Best);
1691  return Block;
1692}
1693 
1694// Tracking of currently alive registers to determine VGPR Usage.
1695 
1696void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1697  for (unsigned Reg : Regs) {
1698    // For now only track virtual registers.
1699    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1700      continue;
1701    // If not already in the live set, then add it.
1702    (void) LiveRegs.insert(Reg);
1703  }
1704}
1705 
1706void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1707                                       std::set<unsigned> &Regs) {
1708  for (unsigned Reg : Regs) {
1709    // For now only track virtual registers.
1710    std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1711    assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__))
1712               LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__))
1713               LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__));
1714    --LiveRegsConsumers[Reg];
1715    if (LiveRegsConsumers[Reg] == 0)
1716      LiveRegs.erase(Pos);
1717  }
1718}
1719 
1720void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1721  for (const auto &Block : Parent->getSuccs()) {
1722    if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1723      ReadyBlocks.push_back(Block.first);
1724 
1725    if (Parent->isHighLatencyBlock() &&
1726        Block.second == SIScheduleBlockLinkKind::Data)
1727      LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1728  }
1729}
1730 
1731void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1732  decreaseLiveRegs(Block, Block->getInRegs());
1733  addLiveRegs(Block->getOutRegs());
1734  releaseBlockSuccs(Block);
1735  for (std::map<unsigned, unsigned>::iterator RegI =
1736       LiveOutRegsNumUsages[Block->getID()].begin(),
1737       E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1738    std::pair<unsigned, unsigned> RegP = *RegI;
1739    // We produce this register, thus it must not be previously alive.
1740    assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
 == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1741, __extension__ __PRETTY_FUNCTION__))
1741           LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
 == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
 0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1741, __extension__ __PRETTY_FUNCTION__));
1742    LiveRegsConsumers[RegP.first] += RegP.second;
1743  }
1744  if (LastPosHighLatencyParentScheduled[Block->getID()] >
1745        (unsigned)LastPosWaitedHighLatency)
1746    LastPosWaitedHighLatency =
1747      LastPosHighLatencyParentScheduled[Block->getID()];
1748  ++NumBlockScheduled;
1749}
1750 
1751std::vector<int>
1752SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1753                                     std::set<unsigned> &OutRegs) {
1754  std::vector<int> DiffSetPressure;
1755  DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1756 
1757  for (unsigned Reg : InRegs) {
1758    // For now only track virtual registers.
1759    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1760      continue;
1761    if (LiveRegsConsumers[Reg] > 1)
1762      continue;
1763    PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1764    for (; PSetI.isValid(); ++PSetI) {
1765      DiffSetPressure[*PSetI] -= PSetI.getWeight();
1766    }
1767  }
1768 
1769  for (unsigned Reg : OutRegs) {
1770    // For now only track virtual registers.
1771    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1772      continue;
1773    PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1774    for (; PSetI.isValid(); ++PSetI) {
1775      DiffSetPressure[*PSetI] += PSetI.getWeight();
1776    }
1777  }
1778 
1779  return DiffSetPressure;
1780}
1781 
1782// SIScheduler //
1783 
1784struct SIScheduleBlockResult
1785SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1786                             SISchedulerBlockSchedulerVariant ScheduleVariant) {
1787  SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1788  SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
1789  std::vector<SIScheduleBlock*> ScheduledBlocks;
1790  struct SIScheduleBlockResult Res;
1791 
1792  ScheduledBlocks = Scheduler.getBlocks();
1793 
1794  for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1795    SIScheduleBlock *Block = ScheduledBlocks[b];
1796    std::vector<SUnit*> SUs = Block->getScheduledUnits();
1797 
1798    for (SUnit* SU : SUs)
1799      Res.SUs.push_back(SU->NodeNum);
1800  }
1801 
1802  Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1803  Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1804  return Res;
1805}
1806 
1807// SIScheduleDAGMI //
1808 
1809SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1810  ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)) {
1811  SITII = static_cast<const SIInstrInfo*>(TII);
1812  SITRI = static_cast<const SIRegisterInfo*>(TRI);
1813 
1814  VGPRSetID = SITRI->getVGPRPressureSet();
1815  SGPRSetID = SITRI->getSGPRPressureSet();
1816}
1817 
1818SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1819 
1820// Code adapted from scheduleDAG.cpp
1821// Does a topological sort over the SUs.
1822// Both TopDown and BottomUp
1823void SIScheduleDAGMI::topologicalSort() {
1824  Topo.InitDAGTopologicalSorting();
1825 
1826  TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1827  BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1828}
1829 
1830// Move low latencies further from their user without
1831// increasing SGPR usage (in general)
1832// This is to be replaced by a better pass that would
1833// take into account SGPR usage (based on VGPR Usage
1834// and the corresponding wavefront count), that would
1835// try to merge groups of loads if it make sense, etc
1836void SIScheduleDAGMI::moveLowLatencies() {
1837   unsigned DAGSize = SUnits.size();
1838   int LastLowLatencyUser = -1;
1839   int LastLowLatencyPos = -1;
1840 
1841   for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1842    SUnit *SU = &SUnits[ScheduledSUnits[i]];
1843    bool IsLowLatencyUser = false;
1844    unsigned MinPos = 0;
1845 
1846    for (SDep& PredDep : SU->Preds) {
1847      SUnit *Pred = PredDep.getSUnit();
1848      if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1849        IsLowLatencyUser = true;
1850      }
1851      if (Pred->NodeNum >= DAGSize)
1852        continue;
1853      unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1854      if (PredPos >= MinPos)
1855        MinPos = PredPos + 1;
1856    }
1857 
1858    if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1859      unsigned BestPos = LastLowLatencyUser + 1;
1860      if ((int)BestPos <= LastLowLatencyPos)
1861        BestPos = LastLowLatencyPos + 1;
1862      if (BestPos < MinPos)
1863        BestPos = MinPos;
1864      if (BestPos < i) {
1865        for (unsigned u = i; u > BestPos; --u) {
1866          ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1867          ScheduledSUnits[u] = ScheduledSUnits[u-1];
1868        }
1869        ScheduledSUnits[BestPos] = SU->NodeNum;
1870        ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1871      }
1872      LastLowLatencyPos = BestPos;
1873      if (IsLowLatencyUser)
1874        LastLowLatencyUser = BestPos;
1875    } else if (IsLowLatencyUser) {
1876      LastLowLatencyUser = i;
1877    // Moves COPY instructions on which depends
1878    // the low latency instructions too.
1879    } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1880      bool CopyForLowLat = false;
1881      for (SDep& SuccDep : SU->Succs) {
1882        SUnit *Succ = SuccDep.getSUnit();
1883        if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1884          CopyForLowLat = true;
1885        }
1886      }
1887      if (!CopyForLowLat)
1888        continue;
1889      if (MinPos < i) {
1890        for (unsigned u = i; u > MinPos; --u) {
1891          ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1892          ScheduledSUnits[u] = ScheduledSUnits[u-1];
1893        }
1894        ScheduledSUnits[MinPos] = SU->NodeNum;
1895        ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1896      }
1897    }
1898  }
1899}
1900 
1901void SIScheduleDAGMI::restoreSULinksLeft() {
1902  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1903    SUnits[i].isScheduled = false;
1904    SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1905    SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1906    SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1907    SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1908  }
1909}
1910 
1911// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1912template<typename _Iterator> void
1913SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1914                                  unsigned &VgprUsage, unsigned &SgprUsage) {
1915  VgprUsage = 0;
1916  SgprUsage = 0;
1917  for (_Iterator RegI = First; RegI != End; ++RegI) {
1918    unsigned Reg = *RegI;
1919    // For now only track virtual registers
1920    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1921      continue;
1922    PSetIterator PSetI = MRI.getPressureSets(Reg);
1923    for (; PSetI.isValid(); ++PSetI) {
1924      if (*PSetI == VGPRSetID)
1925        VgprUsage += PSetI.getWeight();
1926      else if (*PSetI == SGPRSetID)
1927        SgprUsage += PSetI.getWeight();
1928    }
1929  }
1930}
1931 
1932void SIScheduleDAGMI::schedule()
1933{
1934  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1935  SIScheduleBlockResult Best, Temp;
1936  DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false);
1937 
1938  buildDAGWithRegPressure();
1939  DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1940    for(SUnit& SU : SUnits)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1941       SU.dumpAll(this)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1942  )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false);
1943 
1944  topologicalSort();
1945  findRootsAndBiasEdges(TopRoots, BotRoots);
1946  // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1947  // functions, but to make them happy we must initialize
1948  // the default Scheduler implementation (even if we do not
1949  // run it)
1950  SchedImpl->initialize(this);
1951  initQueues(TopRoots, BotRoots);
1952 
1953  // Fill some stats to help scheduling.
1954 
1955  SUnitsLinksBackup = SUnits;
1956  IsLowLatencySU.clear();
1957  LowLatencyOffset.clear();
1958  IsHighLatencySU.clear();
1959 
1960  IsLowLatencySU.resize(SUnits.size(), 0);
1961  LowLatencyOffset.resize(SUnits.size(), 0);
1962  IsHighLatencySU.resize(SUnits.size(), 0);
1963 
1964  for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1965    SUnit *SU = &SUnits[i];
1966    unsigned BaseLatReg;
1967    int64_t OffLatReg;
1968    if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1969      IsLowLatencySU[i] = 1;
1970      if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
1971                                       TRI))
1972        LowLatencyOffset[i] = OffLatReg;
1973    } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
1974      IsHighLatencySU[i] = 1;
1975  }
1976 
1977  SIScheduler Scheduler(this);
1978  Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
1979                                   SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1980 
1981  // if VGPR usage is extremely high, try other good performing variants
1982  // which could lead to lower VGPR usage
1983  if (Best.MaxVGPRUsage > 180) {
1984    static const std::pair<SISchedulerBlockCreatorVariant,
1985                           SISchedulerBlockSchedulerVariant>
1986        Variants[] = {
1987      { LatenciesAlone, BlockRegUsageLatency },
1988//      { LatenciesAlone, BlockRegUsage },
1989      { LatenciesGrouped, BlockLatencyRegUsage },
1990//      { LatenciesGrouped, BlockRegUsageLatency },
1991//      { LatenciesGrouped, BlockRegUsage },
1992      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1993//      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1994//      { LatenciesAlonePlusConsecutive, BlockRegUsage }
1995    };
1996    for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1997      Temp = Scheduler.scheduleVariant(v.first, v.second);
1998      if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1999        Best = Temp;
2000    }
2001  }
2002  // if VGPR usage is still extremely high, we may spill. Try other variants
2003  // which are less performing, but that could lead to lower VGPR usage.
2004  if (Best.MaxVGPRUsage > 200) {
2005    static const std::pair<SISchedulerBlockCreatorVariant,
2006                           SISchedulerBlockSchedulerVariant>
2007        Variants[] = {
2008//      { LatenciesAlone, BlockRegUsageLatency },
2009      { LatenciesAlone, BlockRegUsage },
2010//      { LatenciesGrouped, BlockLatencyRegUsage },
2011      { LatenciesGrouped, BlockRegUsageLatency },
2012      { LatenciesGrouped, BlockRegUsage },
2013//      { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
2014      { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
2015      { LatenciesAlonePlusConsecutive, BlockRegUsage }
2016    };
2017    for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
2018      Temp = Scheduler.scheduleVariant(v.first, v.second);
2019      if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
2020        Best = Temp;
2021    }
2022  }
2023 
2024  ScheduledSUnits = Best.SUs;
2025  ScheduledSUnitsInv.resize(SUnits.size());
2026 
2027  for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
2028    ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
2029  }
2030 
2031  moveLowLatencies();
2032 
2033  // Tell the outside world about the result of the scheduling.
2034 
2035  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
 && "bad initial Top tracker") ? void (0) : __assert_fail
 ("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2035, __extension__ __PRETTY_FUNCTION__));
2036  TopRPTracker.setPos(CurrentTop);
2037 
2038  for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
2039       E = ScheduledSUnits.end(); I != E; ++I) {
2040    SUnit *SU = &SUnits[*I];
2041 
2042    scheduleMI(SU, true);
2043 
2044    DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
 SU->NodeNum << ") " << *SU->getInstr(); } }
 while (false)
2045                 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
 SU->NodeNum << ") " << *SU->getInstr(); } }
 while (false);
2046  }
2047 
2048  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
 "Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2048, __extension__ __PRETTY_FUNCTION__));
2049 
2050  placeDebugValues();
2051 
2052  DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
2053    dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
2054           << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
2055    dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
2056    dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false)
2057  })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
 << printMBBReference(*begin()->getParent()) <<
 " ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
 (false);
2058}