Bug Summary

File:build/source/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:line 1633, column 3
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/source/llvm/lib/Target/AMDGPU -I include -I /build/source/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIMachineScheduler.h"
15#include "SIInstrInfo.h"
16#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17#include "llvm/CodeGen/LiveIntervals.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
23
24// This scheduler implements a different scheduling algorithm than
25// GenericScheduler.
26//
27// There are several specific architecture behaviours that can't be modelled
28// for GenericScheduler:
29// . When accessing the result of an SGPR load instruction, you have to wait
30// for all the SGPR load instructions before your current instruction to
31// have finished.
32// . When accessing the result of an VGPR load instruction, you have to wait
33// for all the VGPR load instructions previous to the VGPR load instruction
34// you are interested in to finish.
35// . The less the register pressure, the best load latencies are hidden
36//
37// Moreover some specifities (like the fact a lot of instructions in the shader
38// have few dependencies) makes the generic scheduler have some unpredictable
39// behaviours. For example when register pressure becomes high, it can either
40// manage to prevent register pressure from going too high, or it can
41// increase register pressure even more than if it hadn't taken register
42// pressure into account.
43//
44// Also some other bad behaviours are generated, like loading at the beginning
45// of the shader a constant in VGPR you won't need until the end of the shader.
46//
47// The scheduling problem for SI can distinguish three main parts:
48// . Hiding high latencies (texture sampling, etc)
49// . Hiding low latencies (SGPR constant loading, etc)
50// . Keeping register usage low for better latency hiding and general
51// performance
52//
53// Some other things can also affect performance, but are hard to predict
54// (cache usage, the fact the HW can issue several instructions from different
55// wavefronts if different types, etc)
56//
57// This scheduler tries to solve the scheduling problem by dividing it into
58// simpler sub-problems. It divides the instructions into blocks, schedules
59// locally inside the blocks where it takes care of low latencies, and then
60// chooses the order of the blocks by taking care of high latencies.
61// Dividing the instructions into blocks helps control keeping register
62// usage low.
63//
64// First the instructions are put into blocks.
65// We want the blocks help control register usage and hide high latencies
66// later. To help control register usage, we typically want all local
67// computations, when for example you create a result that can be consumed
68// right away, to be contained in a block. Block inputs and outputs would
69// typically be important results that are needed in several locations of
70// the shader. Since we do want blocks to help hide high latencies, we want
71// the instructions inside the block to have a minimal set of dependencies
72// on high latencies. It will make it easy to pick blocks to hide specific
73// high latencies.
74// The block creation algorithm is divided into several steps, and several
75// variants can be tried during the scheduling process.
76//
77// Second the order of the instructions inside the blocks is chosen.
78// At that step we do take into account only register usage and hiding
79// low latency instructions
80//
81// Third the block order is chosen, there we try to hide high latencies
82// and keep register usage low.
83//
84// After the third step, a pass is done to improve the hiding of low
85// latencies.
86//
87// Actually when talking about 'low latency' or 'high latency' it includes
88// both the latency to get the cache (or global mem) data go to the register,
89// and the bandwidth limitations.
90// Increasing the number of active wavefronts helps hide the former, but it
91// doesn't solve the latter, thus why even if wavefront count is high, we have
92// to try have as many instructions hiding high latencies as possible.
93// The OpenCL doc says for example latency of 400 cycles for a global mem
94// access, which is hidden by 10 instructions if the wavefront count is 10.
95
96// Some figures taken from AMD docs:
97// Both texture and constant L1 caches are 4-way associative with 64 bytes
98// lines.
99// Constant cache is shared with 4 CUs.
100// For texture sampling, the address generation unit receives 4 texture
101// addresses per cycle, thus we could expect texture sampling latency to be
102// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
103// instructions in a wavefront group are executed every 4 cycles),
104// or 16 instructions if the other wavefronts associated to the 3 other VALUs
105// of the CU do texture sampling too. (Don't take these figures too seriously,
106// as I'm not 100% sure of the computation)
107// Data exports should get similar latency.
108// For constant loading, the cache is shader with 4 CUs.
109// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
110// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
111// It means a simple s_buffer_load should take one instruction to hide, as
112// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
113// cache line.
114//
115// As of today the driver doesn't preload the constants in cache, thus the
116// first loads get extra latency. The doc says global memory access can be
117// 300-600 cycles. We do not specially take that into account when scheduling
118// As we expect the driver to be able to preload the constants soon.
119
120// common code //
121
122#ifndef NDEBUG
123
124static const char *getReasonStr(SIScheduleCandReason Reason) {
125 switch (Reason) {
126 case NoCand: return "NOCAND";
127 case RegUsage: return "REGUSAGE";
128 case Latency: return "LATENCY";
129 case Successor: return "SUCCESSOR";
130 case Depth: return "DEPTH";
131 case NodeOrder: return "ORDER";
132 }
133 llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 133)
;
134}
135
136#endif
137
138namespace llvm {
139namespace SISched {
140static bool tryLess(int TryVal, int CandVal,
141 SISchedulerCandidate &TryCand,
142 SISchedulerCandidate &Cand,
143 SIScheduleCandReason Reason) {
144 if (TryVal < CandVal) {
145 TryCand.Reason = Reason;
146 return true;
147 }
148 if (TryVal > CandVal) {
149 if (Cand.Reason > Reason)
150 Cand.Reason = Reason;
151 return true;
152 }
153 Cand.setRepeat(Reason);
154 return false;
155}
156
157static bool tryGreater(int TryVal, int CandVal,
158 SISchedulerCandidate &TryCand,
159 SISchedulerCandidate &Cand,
160 SIScheduleCandReason Reason) {
161 if (TryVal > CandVal) {
162 TryCand.Reason = Reason;
163 return true;
164 }
165 if (TryVal < CandVal) {
166 if (Cand.Reason > Reason)
167 Cand.Reason = Reason;
168 return true;
169 }
170 Cand.setRepeat(Reason);
171 return false;
172}
173} // end namespace SISched
174} // end namespace llvm
175
176// SIScheduleBlock //
177
178void SIScheduleBlock::addUnit(SUnit *SU) {
179 NodeNum2Index[SU->NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
181}
182
183#ifndef NDEBUG
184void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
185
186 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
187 dbgs() << '\n';
188}
189#endif
190
191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
193 // Initialize the candidate if needed.
194 if (!Cand.isValid()) {
195 TryCand.Reason = NodeOrder;
196 return;
197 }
198
199 if (Cand.SGPRUsage > 60 &&
200 SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
201 TryCand, Cand, RegUsage))
202 return;
203
204 // Schedule low latency instructions as top as possible.
205 // Order of priority is:
206 // . Low latency instructions which do not depend on other low latency
207 // instructions we haven't waited for
208 // . Other instructions which do not depend on low latency instructions
209 // we haven't waited for
210 // . Low latencies
211 // . All other instructions
212 // Goal is to get: low latency instructions - independent instructions
213 // - (eventually some more low latency instructions)
214 // - instructions that depend on the first low latency instructions.
215 // If in the block there is a lot of constant loads, the SGPR usage
216 // could go quite high, thus above the arbitrary limit of 60 will encourage
217 // use the already loaded constants (in order to release some SGPRs) before
218 // loading more.
219 if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
220 Cand.HasLowLatencyNonWaitedParent,
221 TryCand, Cand, SIScheduleCandReason::Depth))
222 return;
223
224 if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
225 TryCand, Cand, SIScheduleCandReason::Depth))
226 return;
227
228 if (TryCand.IsLowLatency &&
229 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
230 TryCand, Cand, SIScheduleCandReason::Depth))
231 return;
232
233 if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
234 TryCand, Cand, RegUsage))
235 return;
236
237 // Fall through to original instruction order.
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
239 TryCand.Reason = NodeOrder;
240 }
241}
242
243SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
245
246 for (SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
250 // Predict register usage after this instruction.
251 TryCand.SU = SU;
252 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
256 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason != NoCand)
261 TopCand.setBest(TryCand);
262 }
263
264 return TopCand.SU;
265}
266
267
268// Schedule something valid.
269void SIScheduleBlock::fastSchedule() {
270 TopReadySUs.clear();
271 if (Scheduled)
272 undoSchedule();
273
274 for (SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
277 }
278
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
282 nodeScheduled(SU);
283 }
284
285 Scheduled = true;
286}
287
288// Returns if the register was set between first and last.
289static bool isDefBetween(unsigned Reg,
290 SlotIndex First, SlotIndex Last,
291 const MachineRegisterInfo *MRI,
292 const LiveIntervals *LIS) {
293 for (MachineRegisterInfo::def_instr_iterator
294 UI = MRI->def_instr_begin(Reg),
295 UE = MRI->def_instr_end(); UI != UE; ++UI) {
296 const MachineInstr* MI = &*UI;
297 if (MI->isDebugValue())
298 continue;
299 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
300 if (InstSlot >= First && InstSlot <= Last)
301 return true;
302 }
303 return false;
304}
305
306void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
307 MachineBasicBlock::iterator EndBlock) {
308 IntervalPressure Pressure, BotPressure;
309 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
310 LiveIntervals *LIS = DAG->getLIS();
311 MachineRegisterInfo *MRI = DAG->getMRI();
312 DAG->initRPTracker(TopRPTracker);
313 DAG->initRPTracker(BotRPTracker);
314 DAG->initRPTracker(RPTracker);
315
316 // Goes though all SU. RPTracker captures what had to be alive for the SUs
317 // to execute, and what is still alive at the end.
318 for (SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
320 RPTracker.advance();
321 }
322
323 // Close the RPTracker to finalize live ins/outs.
324 RPTracker.closeRegion();
325
326 // Initialize the live ins and live outs.
327 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329
330 // Do not Track Physical Registers, because it messes up.
331 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332 if (RegMaskPair.RegUnit.isVirtual())
333 LiveInRegs.insert(RegMaskPair.RegUnit);
334 }
335 LiveOutRegs.clear();
336 // There is several possibilities to distinguish:
337 // 1) Reg is not input to any instruction in the block, but is output of one
338 // 2) 1) + read in the block and not needed after it
339 // 3) 1) + read in the block but needed in another block
340 // 4) Reg is input of an instruction but another block will read it too
341 // 5) Reg is input of an instruction and then rewritten in the block.
342 // result is not read in the block (implies used in another block)
343 // 6) Reg is input of an instruction and then rewritten in the block.
344 // result is read in the block and not needed in another block
345 // 7) Reg is input of an instruction and then rewritten in the block.
346 // result is read in the block but also needed in another block
347 // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
348 // We want LiveOutRegs to contain only Regs whose content will be read after
349 // in another block, and whose content was written in the current block,
350 // that is we want it to get 1, 3, 5, 7
351 // Since we made the MIs of a block to be packed all together before
352 // scheduling, then the LiveIntervals were correct, and the RPTracker was
353 // able to correctly handle 5 vs 6, 2 vs 3.
354 // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
355 // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
356 // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7
357 // The use of findDefBetween removes the case 4.
358 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
359 Register Reg = RegMaskPair.RegUnit;
360 if (Reg.isVirtual() &&
361 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
362 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
363 LIS)) {
364 LiveOutRegs.insert(Reg);
365 }
366 }
367
368 // Pressure = sum_alive_registers register size
369 // Internally llvm will represent some registers as big 128 bits registers
370 // for example, but they actually correspond to 4 actual 32 bits registers.
371 // Thus Pressure is not equal to num_alive_registers * constant.
372 LiveInPressure = TopPressure.MaxSetPressure;
373 LiveOutPressure = BotPressure.MaxSetPressure;
374
375 // Prepares TopRPTracker for top down scheduling.
376 TopRPTracker.closeTop();
377}
378
379void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
380 MachineBasicBlock::iterator EndBlock) {
381 if (!Scheduled)
382 fastSchedule();
383
384 // PreScheduling phase to set LiveIn and LiveOut.
385 initRegPressure(BeginBlock, EndBlock);
386 undoSchedule();
387
388 // Schedule for real now.
389
390 TopReadySUs.clear();
391
392 for (SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
395 }
396
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
400 TopRPTracker.setPos(SU->getInstr());
401 TopRPTracker.advance();
402 nodeScheduled(SU);
403 }
404
405 // TODO: compute InternalAdditionalPressure.
406 InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
407
408 // Check everything is right.
409#ifndef NDEBUG
410 assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 411, __extension__
__PRETTY_FUNCTION__))
411 TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 411, __extension__
__PRETTY_FUNCTION__))
;
412 for (SUnit* SU : SUnits) {
413 assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 414, __extension__
__PRETTY_FUNCTION__))
414 SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 414, __extension__
__PRETTY_FUNCTION__))
;
415 }
416#endif
417
418 Scheduled = true;
419}
420
421void SIScheduleBlock::undoSchedule() {
422 for (SUnit* SU : SUnits) {
423 SU->isScheduled = false;
424 for (SDep& Succ : SU->Succs) {
425 if (BC->isSUInBlock(Succ.getSUnit(), ID))
426 undoReleaseSucc(SU, &Succ);
427 }
428 }
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
431 Scheduled = false;
432}
433
434void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
435 SUnit *SuccSU = SuccEdge->getSUnit();
436
437 if (SuccEdge->isWeak()) {
438 ++SuccSU->WeakPredsLeft;
439 return;
440 }
441 ++SuccSU->NumPredsLeft;
442}
443
444void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
445 SUnit *SuccSU = SuccEdge->getSUnit();
446
447 if (SuccEdge->isWeak()) {
448 --SuccSU->WeakPredsLeft;
449 return;
450 }
451#ifndef NDEBUG
452 if (SuccSU->NumPredsLeft == 0) {
453 dbgs() << "*** Scheduling failed! ***\n";
454 DAG->dumpNode(*SuccSU);
455 dbgs() << " has been released too many times!\n";
456 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 456)
;
457 }
458#endif
459
460 --SuccSU->NumPredsLeft;
461}
462
463/// Release Successors of the SU that are in the block or not.
464void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
465 for (SDep& Succ : SU->Succs) {
466 SUnit *SuccSU = Succ.getSUnit();
467
468 if (SuccSU->NodeNum >= DAG->SUnits.size())
469 continue;
470
471 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
472 continue;
473
474 releaseSucc(SU, &Succ);
475 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
476 TopReadySUs.push_back(SuccSU);
477 }
478}
479
480void SIScheduleBlock::nodeScheduled(SUnit *SU) {
481 // Is in TopReadySUs
482 assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
__assert_fail ("!SU->NumPredsLeft", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 482, __extension__ __PRETTY_FUNCTION__))
;
483 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
484 if (I == TopReadySUs.end()) {
485 dbgs() << "Data Structure Bug in SI Scheduler\n";
486 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 486)
;
487 }
488 TopReadySUs.erase(I);
489
490 releaseSuccessors(SU, true);
491 // Scheduling this node will trigger a wait,
492 // thus propagate to other instructions that they do not need to wait either.
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
495
496 if (DAG->IsLowLatencySU[SU->NodeNum]) {
497 for (SDep& Succ : SU->Succs) {
498 std::map<unsigned, unsigned>::iterator I =
499 NodeNum2Index.find(Succ.getSUnit()->NodeNum);
500 if (I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[I->second] = 1;
502 }
503 }
504 SU->isScheduled = true;
505}
506
507void SIScheduleBlock::finalizeUnits() {
508 // We remove links from outside blocks to enable scheduling inside the block.
509 for (SUnit* SU : SUnits) {
510 releaseSuccessors(SU, false);
511 if (DAG->IsHighLatencySU[SU->NodeNum])
512 HighLatencyBlock = true;
513 }
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
515}
516
517// we maintain ascending order of IDs
518void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
519 unsigned PredID = Pred->getID();
520
521 // Check if not already predecessor.
522 for (SIScheduleBlock* P : Preds) {
523 if (PredID == P->getID())
524 return;
525 }
526 Preds.push_back(Pred);
527
528 assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
529 [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
530 SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
531 return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
532 }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
533 "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
;
534}
535
536void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
537 SIScheduleBlockLinkKind Kind) {
538 unsigned SuccID = Succ->getID();
539
540 // Check if not already predecessor.
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542 if (SuccID == S.first->getID()) {
543 if (S.second == SIScheduleBlockLinkKind::NoData &&
544 Kind == SIScheduleBlockLinkKind::Data)
545 S.second = Kind;
546 return;
547 }
548 }
549 if (Succ->isHighLatencyBlock())
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::pair(Succ, Kind));
552
553 assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
554 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
555 "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
;
556}
557
558#ifndef NDEBUG
559void SIScheduleBlock::printDebug(bool full) {
560 dbgs() << "Block (" << ID << ")\n";
561 if (!full)
562 return;
563
564 dbgs() << "\nContains High Latency Instruction: "
565 << HighLatencyBlock << '\n';
566 dbgs() << "\nDepends On:\n";
567 for (SIScheduleBlock* P : Preds) {
568 P->printDebug(false);
569 }
570
571 dbgs() << "\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
573 if (S.second == SIScheduleBlockLinkKind::Data)
574 dbgs() << "(Data Dep) ";
575 S.first->printDebug(false);
576 }
577
578 if (Scheduled) {
579 dbgs() << "LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
582 dbgs() << "LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
585 dbgs() << "LiveIns:\n";
586 for (unsigned Reg : LiveInRegs)
587 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
588
589 dbgs() << "\nLiveOuts:\n";
590 for (unsigned Reg : LiveOutRegs)
591 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
592 }
593
594 dbgs() << "\nInstructions:\n";
595 for (const SUnit* SU : SUnits)
596 DAG->dumpNode(*SU);
597
598 dbgs() << "///////////////////////\n";
599}
600#endif
601
602// SIScheduleBlockCreator //
603
604SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
605 : DAG(DAG) {}
606
607SIScheduleBlocks
608SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
610 Blocks.find(BlockVariant);
611 if (B == Blocks.end()) {
612 SIScheduleBlocks Res;
613 createBlocksForVariant(BlockVariant);
614 topologicalSort();
615 scheduleInsideBlocks();
616 fillStats();
617 Res.Blocks = CurrentBlocks;
618 Res.TopDownIndex2Block = TopDownIndex2Block;
619 Res.TopDownBlock2Index = TopDownBlock2Index;
620 Blocks[BlockVariant] = Res;
621 return Res;
622 } else {
623 return B->second;
624 }
625}
626
627bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
628 if (SU->NodeNum >= DAG->SUnits.size())
629 return false;
630 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
631}
632
633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->SUnits.size();
635
636 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
637 SUnit *SU = &DAG->SUnits[i];
638 if (DAG->IsHighLatencySU[SU->NodeNum]) {
639 CurrentColoring[SU->NodeNum] = NextReservedID++;
640 }
641 }
642}
643
644static bool
645hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
646 for (const auto &PredDep : SU.Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
648 PredDep.getKind() == llvm::SDep::Data)
649 return true;
650 }
651 return false;
652}
653
654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->SUnits.size();
656 unsigned NumHighLatencies = 0;
657 unsigned GroupSize;
658 int Color = NextReservedID;
659 unsigned Count = 0;
660 std::set<unsigned> FormingGroup;
661
662 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
663 SUnit *SU = &DAG->SUnits[i];
664 if (DAG->IsHighLatencySU[SU->NodeNum])
665 ++NumHighLatencies;
666 }
667
668 if (NumHighLatencies == 0)
669 return;
670
671 if (NumHighLatencies <= 6)
672 GroupSize = 2;
673 else if (NumHighLatencies <= 12)
674 GroupSize = 3;
675 else
676 GroupSize = 4;
677
678 for (unsigned SUNum : DAG->TopDownIndex2SU) {
679 const SUnit &SU = DAG->SUnits[SUNum];
680 if (DAG->IsHighLatencySU[SU.NodeNum]) {
681 unsigned CompatibleGroup = true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
684
685 // We don't want to put in the same block
686 // two high latency instructions that depend
687 // on each other.
688 // One way would be to check canAddEdge
689 // in both directions, but that currently is not
690 // enough because there the high latency order is
691 // enforced (via links).
692 // Instead, look at the dependencies between the
693 // high latency instructions and deduce if it is
694 // a data dependency or not.
695 for (unsigned j : FormingGroup) {
696 bool HasSubGraph;
697 std::vector<int> SubGraph;
698 // By construction (topological order), if SU and
699 // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
700 // in the parent graph of SU.
701#ifndef NDEBUG
702 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
703 HasSubGraph);
704 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 704, __extension__ __PRETTY_FUNCTION__))
;
705#endif
706 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
707 HasSubGraph);
708 if (!HasSubGraph)
709 continue; // No dependencies between each other
710 else if (SubGraph.size() > 5) {
711 // Too many elements would be required to be added to the block.
712 CompatibleGroup = false;
713 break;
714 }
715 else {
716 // Check the type of dependency
717 for (unsigned k : SubGraph) {
718 // If in the path to join the two instructions,
719 // there is another high latency instruction,
720 // or instructions colored for another block
721 // abort the merge.
722 if (DAG->IsHighLatencySU[k] ||
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup = false;
726 break;
727 }
728 // If one of the SU in the subgraph depends on the result of SU j,
729 // there'll be a data dependency.
730 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
731 CompatibleGroup = false;
732 break;
733 }
734 }
735 if (!CompatibleGroup)
736 break;
737 // Same check for the SU
738 if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
739 CompatibleGroup = false;
740 break;
741 }
742 // Add all the required instructions to the block
743 // These cannot live in another block (because they
744 // depend (order dependency) on one of the
745 // instruction in the block, and are required for the
746 // high latency instruction we add.
747 llvm::append_range(AdditionalElements, SubGraph);
748 }
749 }
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.NodeNum);
752 for (unsigned j : AdditionalElements)
753 CurrentColoring[j] = ProposedColor;
754 CurrentColoring[SU.NodeNum] = ProposedColor;
755 ++Count;
756 }
757 // Found one incompatible instruction,
758 // or has filled a big enough group.
759 // -> start a new one.
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.NodeNum);
765 CurrentColoring[SU.NodeNum] = ProposedColor;
766 Count = 0;
767 } else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
771 Count = 0;
772 }
773 }
774 }
775}
776
777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->SUnits.size();
779 std::map<std::set<unsigned>, unsigned> ColorCombinations;
780
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
783
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786
787 // Traverse TopDown, and give different colors to SUs depending
788 // on which combination of High Latencies they depend on.
789
790 for (unsigned SUNum : DAG->TopDownIndex2SU) {
791 SUnit *SU = &DAG->SUnits[SUNum];
792 std::set<unsigned> SUColors;
793
794 // Already given.
795 if (CurrentColoring[SU->NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
797 CurrentColoring[SU->NodeNum];
798 continue;
799 }
800
801 for (SDep& PredDep : SU->Preds) {
802 SUnit *Pred = PredDep.getSUnit();
803 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
804 continue;
805 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
807 }
808 // Color 0 by default.
809 if (SUColors.empty())
810 continue;
811 // Same color than parents.
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
814 *SUColors.begin();
815 else {
816 std::map<std::set<unsigned>, unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
820 } else {
821 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
822 NextNonReservedID;
823 ColorCombinations[SUColors] = NextNonReservedID++;
824 }
825 }
826 }
827
828 ColorCombinations.clear();
829
830 // Same as before, but BottomUp.
831
832 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
833 SUnit *SU = &DAG->SUnits[SUNum];
834 std::set<unsigned> SUColors;
835
836 // Already given.
837 if (CurrentColoring[SU->NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
839 CurrentColoring[SU->NodeNum];
840 continue;
841 }
842
843 for (SDep& SuccDep : SU->Succs) {
844 SUnit *Succ = SuccDep.getSUnit();
845 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
846 continue;
847 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
849 }
850 // Keep color 0.
851 if (SUColors.empty())
852 continue;
853 // Same color than parents.
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
856 *SUColors.begin();
857 else {
858 std::map<std::set<unsigned>, unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
862 } else {
863 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
864 NextNonReservedID;
865 ColorCombinations[SUColors] = NextNonReservedID++;
866 }
867 }
868 }
869}
870
871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
873
874 // Every combination of colors given by the top down
875 // and bottom up Reserved node dependency
876
877 for (const SUnit &SU : DAG->SUnits) {
878 std::pair<unsigned, unsigned> SUColors;
879
880 // High latency instructions: already given.
881 if (CurrentColoring[SU.NodeNum])
882 continue;
883
884 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
885 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
886
887 std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
888 ColorCombinations.find(SUColors);
889 if (Pos != ColorCombinations.end()) {
890 CurrentColoring[SU.NodeNum] = Pos->second;
891 } else {
892 CurrentColoring[SU.NodeNum] = NextNonReservedID;
893 ColorCombinations[SUColors] = NextNonReservedID++;
894 }
895 }
896}
897
898void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
899 unsigned DAGSize = DAG->SUnits.size();
900 std::vector<int> PendingColoring = CurrentColoring;
901
902 assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
903 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
904 CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
;
905 // If there is no reserved block at all, do nothing. We don't want
906 // everything in one block.
907 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
908 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
909 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
910 CurrentTopDownReservedDependencyColoring.end()) == 0)
911 return;
912
913 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
914 SUnit *SU = &DAG->SUnits[SUNum];
915 std::set<unsigned> SUColors;
916 std::set<unsigned> SUColorsPending;
917
918 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
919 continue;
920
921 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
922 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
923 continue;
924
925 for (SDep& SuccDep : SU->Succs) {
926 SUnit *Succ = SuccDep.getSUnit();
927 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
928 continue;
929 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
930 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
931 SUColors.insert(CurrentColoring[Succ->NodeNum]);
932 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
933 }
934 // If there is only one child/parent block, and that block
935 // is not among the ones we are removing in this path, then
936 // merge the instruction to that block
937 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
938 PendingColoring[SU->NodeNum] = *SUColors.begin();
939 else // TODO: Attribute new colors depending on color
940 // combination of children.
941 PendingColoring[SU->NodeNum] = NextNonReservedID++;
942 }
943 CurrentColoring = PendingColoring;
944}
945
946
947void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
948 unsigned DAGSize = DAG->SUnits.size();
949 unsigned PreviousColor;
950 std::set<unsigned> SeenColors;
951
952 if (DAGSize <= 1)
953 return;
954
955 PreviousColor = CurrentColoring[0];
956
957 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
958 SUnit *SU = &DAG->SUnits[i];
959 unsigned CurrentColor = CurrentColoring[i];
960 unsigned PreviousColorSave = PreviousColor;
961 assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
("i == SU->NodeNum", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 961, __extension__ __PRETTY_FUNCTION__))
;
962
963 if (CurrentColor != PreviousColor)
964 SeenColors.insert(PreviousColor);
965 PreviousColor = CurrentColor;
966
967 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
968 continue;
969
970 if (SeenColors.find(CurrentColor) == SeenColors.end())
971 continue;
972
973 if (PreviousColorSave != CurrentColor)
974 CurrentColoring[i] = NextNonReservedID++;
975 else
976 CurrentColoring[i] = CurrentColoring[i-1];
977 }
978}
979
980void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
981 unsigned DAGSize = DAG->SUnits.size();
982
983 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
984 SUnit *SU = &DAG->SUnits[SUNum];
985 std::set<unsigned> SUColors;
986
987 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
988 continue;
989
990 // No predecessor: Vgpr constant loading.
991 // Low latency instructions usually have a predecessor (the address)
992 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
993 continue;
994
995 for (SDep& SuccDep : SU->Succs) {
996 SUnit *Succ = SuccDep.getSUnit();
997 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
998 continue;
999 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1000 }
1001 if (SUColors.size() == 1)
1002 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1003 }
1004}
1005
1006void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1007 unsigned DAGSize = DAG->SUnits.size();
1008
1009 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1010 SUnit *SU = &DAG->SUnits[SUNum];
1011 std::set<unsigned> SUColors;
1012
1013 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1014 continue;
1015
1016 for (SDep& SuccDep : SU->Succs) {
1017 SUnit *Succ = SuccDep.getSUnit();
1018 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1019 continue;
1020 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1021 }
1022 if (SUColors.size() == 1)
1023 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1024 }
1025}
1026
1027void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1028 unsigned DAGSize = DAG->SUnits.size();
1029
1030 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1031 SUnit *SU = &DAG->SUnits[SUNum];
1032 std::set<unsigned> SUColors;
1033
1034 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1035 continue;
1036
1037 for (SDep& SuccDep : SU->Succs) {
1038 SUnit *Succ = SuccDep.getSUnit();
1039 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1040 continue;
1041 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1042 }
1043 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1044 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1045 }
1046}
1047
1048void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1049 unsigned DAGSize = DAG->SUnits.size();
1050 std::map<unsigned, unsigned> ColorCount;
1051
1052 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1053 SUnit *SU = &DAG->SUnits[SUNum];
1054 unsigned color = CurrentColoring[SU->NodeNum];
1055 ++ColorCount[color];
1056 }
1057
1058 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1059 SUnit *SU = &DAG->SUnits[SUNum];
1060 unsigned color = CurrentColoring[SU->NodeNum];
1061 std::set<unsigned> SUColors;
1062
1063 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1064 continue;
1065
1066 if (ColorCount[color] > 1)
1067 continue;
1068
1069 for (SDep& SuccDep : SU->Succs) {
1070 SUnit *Succ = SuccDep.getSUnit();
1071 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1072 continue;
1073 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1074 }
1075 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1076 --ColorCount[color];
1077 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1078 ++ColorCount[*SUColors.begin()];
1079 }
1080 }
1081}
1082
1083void SIScheduleBlockCreator::cutHugeBlocks() {
1084 // TODO
1085}
1086
1087void SIScheduleBlockCreator::regroupNoUserInstructions() {
1088 unsigned DAGSize = DAG->SUnits.size();
1089 int GroupID = NextNonReservedID++;
1090
1091 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1092 SUnit *SU = &DAG->SUnits[SUNum];
1093 bool hasSuccessor = false;
1094
1095 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1096 continue;
1097
1098 for (SDep& SuccDep : SU->Succs) {
1099 SUnit *Succ = SuccDep.getSUnit();
1100 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1101 continue;
1102 hasSuccessor = true;
1103 }
1104 if (!hasSuccessor)
1105 CurrentColoring[SU->NodeNum] = GroupID;
1106 }
1107}
1108
1109void SIScheduleBlockCreator::colorExports() {
1110 unsigned ExportColor = NextNonReservedID++;
1111 SmallVector<unsigned, 8> ExpGroup;
1112
1113 // Put all exports together in a block.
1114 // The block will naturally end up being scheduled last,
1115 // thus putting exports at the end of the schedule, which
1116 // is better for performance.
1117 // However we must ensure, for safety, the exports can be put
1118 // together in the same block without any other instruction.
1119 // This could happen, for example, when scheduling after regalloc
1120 // if reloading a spilled register from memory using the same
1121 // register than used in a previous export.
1122 // If that happens, do not regroup the exports.
1123 for (unsigned SUNum : DAG->TopDownIndex2SU) {
1124 const SUnit &SU = DAG->SUnits[SUNum];
1125 if (SIInstrInfo::isEXP(*SU.getInstr())) {
1126 // SU is an export instruction. Check whether one of its successor
1127 // dependencies is a non-export, in which case we skip export grouping.
1128 for (const SDep &SuccDep : SU.Succs) {
1129 const SUnit *SuccSU = SuccDep.getSUnit();
1130 if (SuccDep.isWeak() || SuccSU->NodeNum >= DAG->SUnits.size()) {
1131 // Ignore these dependencies.
1132 continue;
1133 }
1134 assert(SuccSU->isInstr() &&(static_cast <bool> (SuccSU->isInstr() && "SUnit unexpectedly not representing an instruction!"
) ? void (0) : __assert_fail ("SuccSU->isInstr() && \"SUnit unexpectedly not representing an instruction!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1135, __extension__
__PRETTY_FUNCTION__))
1135 "SUnit unexpectedly not representing an instruction!")(static_cast <bool> (SuccSU->isInstr() && "SUnit unexpectedly not representing an instruction!"
) ? void (0) : __assert_fail ("SuccSU->isInstr() && \"SUnit unexpectedly not representing an instruction!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1135, __extension__
__PRETTY_FUNCTION__))
;
1136
1137 if (!SIInstrInfo::isEXP(*SuccSU->getInstr())) {
1138 // A non-export depends on us. Skip export grouping.
1139 // Note that this is a bit pessimistic: We could still group all other
1140 // exports that are not depended on by non-exports, directly or
1141 // indirectly. Simply skipping this particular export but grouping all
1142 // others would not account for indirect dependencies.
1143 return;
1144 }
1145 }
1146 ExpGroup.push_back(SUNum);
1147 }
1148 }
1149
1150 // The group can be formed. Give the color.
1151 for (unsigned j : ExpGroup)
1152 CurrentColoring[j] = ExportColor;
1153}
1154
1155void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1156 unsigned DAGSize = DAG->SUnits.size();
1157 std::map<unsigned,unsigned> RealID;
1158
1159 CurrentBlocks.clear();
1160 CurrentColoring.clear();
1161 CurrentColoring.resize(DAGSize, 0);
1162 Node2CurrentBlock.clear();
1163
1164 // Restore links previous scheduling variant has overridden.
1165 DAG->restoreSULinksLeft();
1166
1167 NextReservedID = 1;
1168 NextNonReservedID = DAGSize + 1;
1169
1170 LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false)
;
1171
1172 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1173 colorHighLatenciesGroups();
1174 else
1175 colorHighLatenciesAlone();
1176 colorComputeReservedDependencies();
1177 colorAccordingToReservedDependencies();
1178 colorEndsAccordingToDependencies();
1179 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1180 colorForceConsecutiveOrderInGroup();
1181 regroupNoUserInstructions();
1182 colorMergeConstantLoadsNextGroup();
1183 colorMergeIfPossibleNextGroupOnlyForReserved();
1184 colorExports();
1185
1186 // Put SUs of same color into same block
1187 Node2CurrentBlock.resize(DAGSize, -1);
1188 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1189 SUnit *SU = &DAG->SUnits[i];
1190 unsigned Color = CurrentColoring[SU->NodeNum];
1191 if (RealID.find(Color) == RealID.end()) {
1192 int ID = CurrentBlocks.size();
1193 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
1194 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1195 RealID[Color] = ID;
1196 }
1197 CurrentBlocks[RealID[Color]]->addUnit(SU);
1198 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1199 }
1200
1201 // Build dependencies between blocks.
1202 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1203 SUnit *SU = &DAG->SUnits[i];
1204 int SUID = Node2CurrentBlock[i];
1205 for (SDep& SuccDep : SU->Succs) {
1206 SUnit *Succ = SuccDep.getSUnit();
1207 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1208 continue;
1209 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1210 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1211 SuccDep.isCtrl() ? NoData : Data);
1212 }
1213 for (SDep& PredDep : SU->Preds) {
1214 SUnit *Pred = PredDep.getSUnit();
1215 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1216 continue;
1217 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1218 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1219 }
1220 }
1221
1222 // Free root and leafs of all blocks to enable scheduling inside them.
1223 for (SIScheduleBlock *Block : CurrentBlocks)
1224 Block->finalizeUnits();
1225 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1226 dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1227 for (SIScheduleBlock *Block : CurrentBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1228 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1229 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
;
1230}
1231
1232// Two functions taken from Codegen/MachineScheduler.cpp
1233
1234/// Non-const version.
1235static MachineBasicBlock::iterator
1236nextIfDebug(MachineBasicBlock::iterator I,
1237 MachineBasicBlock::const_iterator End) {
1238 for (; I != End; ++I) {
1239 if (!I->isDebugInstr())
1240 break;
1241 }
1242 return I;
1243}
1244
1245void SIScheduleBlockCreator::topologicalSort() {
1246 unsigned DAGSize = CurrentBlocks.size();
1247 std::vector<int> WorkList;
1248
1249 LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false)
;
1250
1251 WorkList.reserve(DAGSize);
1252 TopDownIndex2Block.resize(DAGSize);
1253 TopDownBlock2Index.resize(DAGSize);
1254 BottomUpIndex2Block.resize(DAGSize);
1255
1256 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1257 SIScheduleBlock *Block = CurrentBlocks[i];
1258 unsigned Degree = Block->getSuccs().size();
1259 TopDownBlock2Index[i] = Degree;
1260 if (Degree == 0) {
1261 WorkList.push_back(i);
1262 }
1263 }
1264
1265 int Id = DAGSize;
1266 while (!WorkList.empty()) {
1267 int i = WorkList.back();
1268 SIScheduleBlock *Block = CurrentBlocks[i];
1269 WorkList.pop_back();
1270 TopDownBlock2Index[i] = --Id;
1271 TopDownIndex2Block[Id] = i;
1272 for (SIScheduleBlock* Pred : Block->getPreds()) {
1273 if (!--TopDownBlock2Index[Pred->getID()])
1274 WorkList.push_back(Pred->getID());
1275 }
1276 }
1277
1278#ifndef NDEBUG
1279 // Check correctness of the ordering.
1280 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1281 SIScheduleBlock *Block = CurrentBlocks[i];
1282 for (SIScheduleBlock* Pred : Block->getPreds()) {
1283 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1284, __extension__
__PRETTY_FUNCTION__))
1284 "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1284, __extension__
__PRETTY_FUNCTION__))
;
1285 }
1286 }
1287#endif
1288
1289 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1290 TopDownIndex2Block.rend());
1291}
1292
1293void SIScheduleBlockCreator::scheduleInsideBlocks() {
1294 unsigned DAGSize = CurrentBlocks.size();
1295
1296 LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false)
;
1297
1298 // We do schedule a valid scheduling such that a Block corresponds
1299 // to a range of instructions.
1300 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false)
;
1301 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1302 SIScheduleBlock *Block = CurrentBlocks[i];
1303 Block->fastSchedule();
1304 }
1305
1306 // Note: the following code, and the part restoring previous position
1307 // is by far the most expensive operation of the Scheduler.
1308
1309 // Do not update CurrentTop.
1310 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1311 std::vector<MachineBasicBlock::iterator> PosOld;
1312 std::vector<MachineBasicBlock::iterator> PosNew;
1313 PosOld.reserve(DAG->SUnits.size());
1314 PosNew.reserve(DAG->SUnits.size());
1315
1316 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1317 int BlockIndice = TopDownIndex2Block[i];
1318 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1319 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1320
1321 for (SUnit* SU : SUs) {
1322 MachineInstr *MI = SU->getInstr();
1323 MachineBasicBlock::iterator Pos = MI;
1324 PosOld.push_back(Pos);
1325 if (&*CurrentTopFastSched == MI) {
1326 PosNew.push_back(Pos);
1327 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1328 DAG->getCurrentBottom());
1329 } else {
1330 // Update the instruction stream.
1331 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1332
1333 // Update LiveIntervals.
1334 // Note: Moving all instructions and calling handleMove every time
1335 // is the most cpu intensive operation of the scheduler.
1336 // It would gain a lot if there was a way to recompute the
1337 // LiveIntervals for the entire scheduling region.
1338 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1339 PosNew.push_back(CurrentTopFastSched);
1340 }
1341 }
1342 }
1343
1344 // Now we have Block of SUs == Block of MI.
1345 // We do the final schedule for the instructions inside the block.
1346 // The property that all the SUs of the Block are grouped together as MI
1347 // is used for correct reg usage tracking.
1348 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1349 SIScheduleBlock *Block = CurrentBlocks[i];
1350 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1351 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1352 }
1353
1354 LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false)
;
1355 // Restore old ordering (which prevents a LIS->handleMove bug).
1356 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1357 MachineBasicBlock::iterator POld = PosOld[i-1];
1358 MachineBasicBlock::iterator PNew = PosNew[i-1];
1359 if (PNew != POld) {
1360 // Update the instruction stream.
1361 DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1362
1363 // Update LiveIntervals.
1364 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1365 }
1366 }
1367
1368 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1369 for (SIScheduleBlock *Block : CurrentBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1370 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1371 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
;
1372}
1373
1374void SIScheduleBlockCreator::fillStats() {
1375 unsigned DAGSize = CurrentBlocks.size();
1376
1377 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1378 int BlockIndice = TopDownIndex2Block[i];
1379 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1380 if (Block->getPreds().empty())
1381 Block->Depth = 0;
1382 else {
1383 unsigned Depth = 0;
1384 for (SIScheduleBlock *Pred : Block->getPreds()) {
1385 if (Depth < Pred->Depth + Pred->getCost())
1386 Depth = Pred->Depth + Pred->getCost();
1387 }
1388 Block->Depth = Depth;
1389 }
1390 }
1391
1392 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1393 int BlockIndice = BottomUpIndex2Block[i];
1394 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1395 if (Block->getSuccs().empty())
1396 Block->Height = 0;
1397 else {
1398 unsigned Height = 0;
1399 for (const auto &Succ : Block->getSuccs())
1400 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1401 Block->Height = Height;
1402 }
1403 }
1404}
1405
1406// SIScheduleBlockScheduler //
1407
1408SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1409 SISchedulerBlockSchedulerVariant Variant,
1410 SIScheduleBlocks BlocksStruct) :
1411 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1412 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1413 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1414
1415 // Fill the usage of every output
1416 // Warning: while by construction we always have a link between two blocks
1417 // when one needs a result from the other, the number of users of an output
1418 // is not the sum of child blocks having as input the same virtual register.
1419 // Here is an example. A produces x and y. B eats x and produces x'.
1420 // C eats x' and y. The register coalescer may have attributed the same
1421 // virtual register to x and x'.
1422 // To count accurately, we do a topological sort. In case the register is
1423 // found for several parents, we increment the usage of the one with the
1424 // highest topological index.
1425 LiveOutRegsNumUsages.resize(Blocks.size());
1426 for (SIScheduleBlock *Block : Blocks) {
1427 for (unsigned Reg : Block->getInRegs()) {
1428 bool Found = false;
1429 int topoInd = -1;
1430 for (SIScheduleBlock* Pred: Block->getPreds()) {
1431 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1432 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1433
1434 if (RegPos != PredOutRegs.end()) {
1435 Found = true;
1436 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1437 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1438 }
1439 }
1440 }
1441
1442 if (!Found)
1443 continue;
1444
1445 int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1446 ++LiveOutRegsNumUsages[PredID][Reg];
1447 }
1448 }
1449
1450 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1451 BlockNumPredsLeft.resize(Blocks.size());
1452 BlockNumSuccsLeft.resize(Blocks.size());
1453
1454 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
13
Assuming 'i' is equal to 'e'
14
Loop condition is false. Execution continues on line 1461
1455 SIScheduleBlock *Block = Blocks[i];
1456 BlockNumPredsLeft[i] = Block->getPreds().size();
1457 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1458 }
1459
1460#ifndef NDEBUG
1461 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
15
Assuming 'i' is equal to 'e'
16
Loop condition is false. Execution continues on line 1467
1462 SIScheduleBlock *Block = Blocks[i];
1463 assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
: __assert_fail ("Block->getID() == i", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1463, __extension__ __PRETTY_FUNCTION__))
;
1464 }
1465#endif
1466
1467 std::set<unsigned> InRegs = DAG->getInRegs();
1468 addLiveRegs(InRegs);
1469
1470 // Increase LiveOutRegsNumUsages for blocks
1471 // producing registers consumed in another
1472 // scheduling region.
1473 for (unsigned Reg : DAG->getOutRegs()) {
1474 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1475 // Do reverse traversal
1476 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1477 SIScheduleBlock *Block = Blocks[ID];
1478 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1479
1480 if (OutRegs.find(Reg) == OutRegs.end())
1481 continue;
1482
1483 ++LiveOutRegsNumUsages[ID][Reg];
1484 break;
1485 }
1486 }
1487
1488 // Fill LiveRegsConsumers for regs that were already
1489 // defined before scheduling.
1490 for (SIScheduleBlock *Block : Blocks) {
1491 for (unsigned Reg : Block->getInRegs()) {
1492 bool Found = false;
1493 for (SIScheduleBlock* Pred: Block->getPreds()) {
1494 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1495 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1496
1497 if (RegPos != PredOutRegs.end()) {
1498 Found = true;
1499 break;
1500 }
1501 }
1502
1503 if (!Found)
1504 ++LiveRegsConsumers[Reg];
1505 }
1506 }
1507
1508 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
17
Assuming 'i' is equal to 'e'
18
Loop condition is false. Execution continues on line 1515
1509 SIScheduleBlock *Block = Blocks[i];
1510 if (BlockNumPredsLeft[i] == 0) {
1511 ReadyBlocks.push_back(Block);
1512 }
1513 }
1514
1515 while (SIScheduleBlock *Block = pickBlock()) {
19
Calling 'SIScheduleBlockScheduler::pickBlock'
1516 BlocksScheduled.push_back(Block);
1517 blockScheduled(Block);
1518 }
1519
1520 LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1521 : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1522 dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1523 } dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
;
1524}
1525
1526bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1527 SIBlockSchedCandidate &TryCand) {
1528 if (!Cand.isValid()) {
1529 TryCand.Reason = NodeOrder;
1530 return true;
1531 }
1532
1533 // Try to hide high latencies.
1534 if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
1535 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1536 return true;
1537 // Schedule high latencies early so you can hide them better.
1538 if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1539 TryCand, Cand, Latency))
1540 return true;
1541 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1542 TryCand, Cand, Depth))
1543 return true;
1544 if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
1545 Cand.NumHighLatencySuccessors,
1546 TryCand, Cand, Successor))
1547 return true;
1548 return false;
1549}
1550
1551bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1552 SIBlockSchedCandidate &TryCand) {
1553 if (!Cand.isValid()) {
1554 TryCand.Reason = NodeOrder;
1555 return true;
1556 }
1557
1558 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1559 TryCand, Cand, RegUsage))
1560 return true;
1561 if (SISched::tryGreater(TryCand.NumSuccessors > 0,
1562 Cand.NumSuccessors > 0,
1563 TryCand, Cand, Successor))
1564 return true;
1565 if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1566 return true;
1567 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1568 TryCand, Cand, RegUsage))
1569 return true;
1570 return false;
1571}
1572
1573SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1574 SIBlockSchedCandidate Cand;
1575 std::vector<SIScheduleBlock*>::iterator Best;
1576 SIScheduleBlock *Block;
1577 if (ReadyBlocks.empty())
20
Assuming the condition is false
21
Taking false branch
1578 return nullptr;
1579
1580 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1581 VregCurrentUsage, SregCurrentUsage);
1582 if (VregCurrentUsage > maxVregUsage)
22
Assuming field 'VregCurrentUsage' is <= field 'maxVregUsage'
23
Taking false branch
1583 maxVregUsage = VregCurrentUsage;
1584 if (SregCurrentUsage > maxSregUsage)
24
Assuming field 'SregCurrentUsage' is <= field 'maxSregUsage'
1585 maxSregUsage = SregCurrentUsage;
1586 LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
25
Taking false branch
26
Assuming 'DebugFlag' is true
27
Assuming the condition is false
28
Taking false branch
29
Loop condition is false. Exiting loop
1587 for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1588 : ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1589 << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1590 dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1591 for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1592 : LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1593 << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1594 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1595 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1596 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
;
1597
1598 Cand.Block = nullptr;
30
Null pointer value stored to 'Cand.Block'
1599 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1600 E = ReadyBlocks.end(); I != E; ++I) {
1601 SIBlockSchedCandidate TryCand;
1602 TryCand.Block = *I;
1603 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1604 TryCand.VGPRUsageDiff =
1605 checkRegUsageImpact(TryCand.Block->getInRegs(),
1606 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1607 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1608 TryCand.NumHighLatencySuccessors =
1609 TryCand.Block->getNumHighLatencySuccessors();
1610 TryCand.LastPosHighLatParentScheduled =
1611 (unsigned int) std::max<int> (0,
1612 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1613 LastPosWaitedHighLatency);
1614 TryCand.Height = TryCand.Block->Height;
1615 // Try not to increase VGPR usage too much, else we may spill.
1616 if (VregCurrentUsage > 120 ||
1617 Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1618 if (!tryCandidateRegUsage(Cand, TryCand) &&
1619 Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1620 tryCandidateLatency(Cand, TryCand);
1621 } else {
1622 if (!tryCandidateLatency(Cand, TryCand))
1623 tryCandidateRegUsage(Cand, TryCand);
1624 }
1625 if (TryCand.Reason != NoCand) {
1626 Cand.setBest(TryCand);
1627 Best = I;
1628 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
1629 << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
;
1630 }
1631 }
1632
1633 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
31
Loop condition is false. Execution continues on line 1633
32
Assuming 'DebugFlag' is true
33
Assuming the condition is true
34
Taking true branch
35
Called C++ object pointer is null
1634 dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1635 << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1636 dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1637 << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1638 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1639 dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
;
1640
1641 Block = Cand.Block;
1642 ReadyBlocks.erase(Best);
1643 return Block;
1644}
1645
1646// Tracking of currently alive registers to determine VGPR Usage.
1647
1648void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1649 for (Register Reg : Regs) {
1650 // For now only track virtual registers.
1651 if (!Reg.isVirtual())
1652 continue;
1653 // If not already in the live set, then add it.
1654 (void) LiveRegs.insert(Reg);
1655 }
1656}
1657
1658void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1659 std::set<unsigned> &Regs) {
1660 for (unsigned Reg : Regs) {
1661 // For now only track virtual registers.
1662 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1663 assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1665, __extension__
__PRETTY_FUNCTION__))
1664 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1665, __extension__
__PRETTY_FUNCTION__))
1665 LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1665, __extension__
__PRETTY_FUNCTION__))
;
1666 --LiveRegsConsumers[Reg];
1667 if (LiveRegsConsumers[Reg] == 0)
1668 LiveRegs.erase(Pos);
1669 }
1670}
1671
1672void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1673 for (const auto &Block : Parent->getSuccs()) {
1674 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1675 ReadyBlocks.push_back(Block.first);
1676
1677 if (Parent->isHighLatencyBlock() &&
1678 Block.second == SIScheduleBlockLinkKind::Data)
1679 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1680 }
1681}
1682
1683void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1684 decreaseLiveRegs(Block, Block->getInRegs());
1685 addLiveRegs(Block->getOutRegs());
1686 releaseBlockSuccs(Block);
1687 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
1688 // We produce this register, thus it must not be previously alive.
1689 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1690, __extension__
__PRETTY_FUNCTION__))
1690 LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1690, __extension__
__PRETTY_FUNCTION__))
;
1691 LiveRegsConsumers[RegP.first] += RegP.second;
1692 }
1693 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1694 (unsigned)LastPosWaitedHighLatency)
1695 LastPosWaitedHighLatency =
1696 LastPosHighLatencyParentScheduled[Block->getID()];
1697 ++NumBlockScheduled;
1698}
1699
1700std::vector<int>
1701SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1702 std::set<unsigned> &OutRegs) {
1703 std::vector<int> DiffSetPressure;
1704 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1705
1706 for (Register Reg : InRegs) {
1707 // For now only track virtual registers.
1708 if (!Reg.isVirtual())
1709 continue;
1710 if (LiveRegsConsumers[Reg] > 1)
1711 continue;
1712 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1713 for (; PSetI.isValid(); ++PSetI) {
1714 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1715 }
1716 }
1717
1718 for (Register Reg : OutRegs) {
1719 // For now only track virtual registers.
1720 if (!Reg.isVirtual())
1721 continue;
1722 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1723 for (; PSetI.isValid(); ++PSetI) {
1724 DiffSetPressure[*PSetI] += PSetI.getWeight();
1725 }
1726 }
1727
1728 return DiffSetPressure;
1729}
1730
1731// SIScheduler //
1732
1733struct SIScheduleBlockResult
1734SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1735 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1736 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1737 SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
12
Calling constructor for 'SIScheduleBlockScheduler'
1738 std::vector<SIScheduleBlock*> ScheduledBlocks;
1739 struct SIScheduleBlockResult Res;
1740
1741 ScheduledBlocks = Scheduler.getBlocks();
1742
1743 for (SIScheduleBlock *Block : ScheduledBlocks) {
1744 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1745
1746 for (SUnit* SU : SUs)
1747 Res.SUs.push_back(SU->NodeNum);
1748 }
1749
1750 Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1751 Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1752 return Res;
1753}
1754
1755// SIScheduleDAGMI //
1756
1757SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1758 ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
1759 SITII = static_cast<const SIInstrInfo*>(TII);
1760 SITRI = static_cast<const SIRegisterInfo*>(TRI);
1761}
1762
1763SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1764
1765// Code adapted from scheduleDAG.cpp
1766// Does a topological sort over the SUs.
1767// Both TopDown and BottomUp
1768void SIScheduleDAGMI::topologicalSort() {
1769 Topo.InitDAGTopologicalSorting();
1770
1771 TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1772 BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1773}
1774
1775// Move low latencies further from their user without
1776// increasing SGPR usage (in general)
1777// This is to be replaced by a better pass that would
1778// take into account SGPR usage (based on VGPR Usage
1779// and the corresponding wavefront count), that would
1780// try to merge groups of loads if it make sense, etc
1781void SIScheduleDAGMI::moveLowLatencies() {
1782 unsigned DAGSize = SUnits.size();
1783 int LastLowLatencyUser = -1;
1784 int LastLowLatencyPos = -1;
1785
1786 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1787 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1788 bool IsLowLatencyUser = false;
1789 unsigned MinPos = 0;
1790
1791 for (SDep& PredDep : SU->Preds) {
1792 SUnit *Pred = PredDep.getSUnit();
1793 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1794 IsLowLatencyUser = true;
1795 }
1796 if (Pred->NodeNum >= DAGSize)
1797 continue;
1798 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1799 if (PredPos >= MinPos)
1800 MinPos = PredPos + 1;
1801 }
1802
1803 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1804 unsigned BestPos = LastLowLatencyUser + 1;
1805 if ((int)BestPos <= LastLowLatencyPos)
1806 BestPos = LastLowLatencyPos + 1;
1807 if (BestPos < MinPos)
1808 BestPos = MinPos;
1809 if (BestPos < i) {
1810 for (unsigned u = i; u > BestPos; --u) {
1811 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1812 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1813 }
1814 ScheduledSUnits[BestPos] = SU->NodeNum;
1815 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1816 }
1817 LastLowLatencyPos = BestPos;
1818 if (IsLowLatencyUser)
1819 LastLowLatencyUser = BestPos;
1820 } else if (IsLowLatencyUser) {
1821 LastLowLatencyUser = i;
1822 // Moves COPY instructions on which depends
1823 // the low latency instructions too.
1824 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1825 bool CopyForLowLat = false;
1826 for (SDep& SuccDep : SU->Succs) {
1827 SUnit *Succ = SuccDep.getSUnit();
1828 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1829 continue;
1830 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1831 CopyForLowLat = true;
1832 }
1833 }
1834 if (!CopyForLowLat)
1835 continue;
1836 if (MinPos < i) {
1837 for (unsigned u = i; u > MinPos; --u) {
1838 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1839 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1840 }
1841 ScheduledSUnits[MinPos] = SU->NodeNum;
1842 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1843 }
1844 }
1845 }
1846}
1847
1848void SIScheduleDAGMI::restoreSULinksLeft() {
1849 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1850 SUnits[i].isScheduled = false;
1851 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1852 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1853 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1854 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1855 }
1856}
1857
1858// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1859template<typename _Iterator> void
1860SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1861 unsigned &VgprUsage, unsigned &SgprUsage) {
1862 VgprUsage = 0;
1863 SgprUsage = 0;
1864 for (_Iterator RegI = First; RegI != End; ++RegI) {
1865 Register Reg = *RegI;
1866 // For now only track virtual registers
1867 if (!Reg.isVirtual())
1868 continue;
1869 PSetIterator PSetI = MRI.getPressureSets(Reg);
1870 for (; PSetI.isValid(); ++PSetI) {
1871 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1872 VgprUsage += PSetI.getWeight();
1873 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1874 SgprUsage += PSetI.getWeight();
1875 }
1876 }
1877}
1878
1879void SIScheduleDAGMI::schedule()
1880{
1881 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1882 SIScheduleBlockResult Best, Temp;
1883 LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false)
;
1
Assuming 'DebugFlag' is false
2
Loop condition is false. Exiting loop
1884
1885 buildDAGWithRegPressure();
1886 postProcessDAG();
1887
1888 LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dump(); } } while (false)
;
3
Assuming 'DebugFlag' is false
4
Loop condition is false. Exiting loop
1889 if (PrintDAGs)
5
Assuming the condition is false
6
Taking false branch
1890 dump();
1891 if (ViewMISchedDAGs)
7
Assuming the condition is false
8
Taking false branch
1892 viewGraph();
1893
1894 topologicalSort();
1895 findRootsAndBiasEdges(TopRoots, BotRoots);
1896 // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1897 // functions, but to make them happy we must initialize
1898 // the default Scheduler implementation (even if we do not
1899 // run it)
1900 SchedImpl->initialize(this);
1901 initQueues(TopRoots, BotRoots);
1902
1903 // Fill some stats to help scheduling.
1904
1905 SUnitsLinksBackup = SUnits;
1906 IsLowLatencySU.clear();
1907 LowLatencyOffset.clear();
1908 IsHighLatencySU.clear();
1909
1910 IsLowLatencySU.resize(SUnits.size(), 0);
1911 LowLatencyOffset.resize(SUnits.size(), 0);
1912 IsHighLatencySU.resize(SUnits.size(), 0);
1913
1914 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
9
Assuming 'i' is equal to 'e'
10
Loop condition is false. Execution continues on line 1928
1915 SUnit *SU = &SUnits[i];
1916 const MachineOperand *BaseLatOp;
1917 int64_t OffLatReg;
1918 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1919 IsLowLatencySU[i] = 1;
1920 bool OffsetIsScalable;
1921 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1922 OffsetIsScalable, TRI))
1923 LowLatencyOffset[i] = OffLatReg;
1924 } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
1925 IsHighLatencySU[i] = 1;
1926 }
1927
1928 SIScheduler Scheduler(this);
1929 Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
11
Calling 'SIScheduler::scheduleVariant'
1930 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1931
1932 // if VGPR usage is extremely high, try other good performing variants
1933 // which could lead to lower VGPR usage
1934 if (Best.MaxVGPRUsage > 180) {
1935 static const std::pair<SISchedulerBlockCreatorVariant,
1936 SISchedulerBlockSchedulerVariant>
1937 Variants[] = {
1938 { LatenciesAlone, BlockRegUsageLatency },
1939// { LatenciesAlone, BlockRegUsage },
1940 { LatenciesGrouped, BlockLatencyRegUsage },
1941// { LatenciesGrouped, BlockRegUsageLatency },
1942// { LatenciesGrouped, BlockRegUsage },
1943 { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1944// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1945// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1946 };
1947 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1948 Temp = Scheduler.scheduleVariant(v.first, v.second);
1949 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1950 Best = Temp;
1951 }
1952 }
1953 // if VGPR usage is still extremely high, we may spill. Try other variants
1954 // which are less performing, but that could lead to lower VGPR usage.
1955 if (Best.MaxVGPRUsage > 200) {
1956 static const std::pair<SISchedulerBlockCreatorVariant,
1957 SISchedulerBlockSchedulerVariant>
1958 Variants[] = {
1959// { LatenciesAlone, BlockRegUsageLatency },
1960 { LatenciesAlone, BlockRegUsage },
1961// { LatenciesGrouped, BlockLatencyRegUsage },
1962 { LatenciesGrouped, BlockRegUsageLatency },
1963 { LatenciesGrouped, BlockRegUsage },
1964// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1965 { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1966 { LatenciesAlonePlusConsecutive, BlockRegUsage }
1967 };
1968 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1969 Temp = Scheduler.scheduleVariant(v.first, v.second);
1970 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1971 Best = Temp;
1972 }
1973 }
1974
1975 ScheduledSUnits = Best.SUs;
1976 ScheduledSUnitsInv.resize(SUnits.size());
1977
1978 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1979 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1980 }
1981
1982 moveLowLatencies();
1983
1984 // Tell the outside world about the result of the scheduling.
1985
1986 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
&& "bad initial Top tracker") ? void (0) : __assert_fail
("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1986, __extension__
__PRETTY_FUNCTION__))
;
1987 TopRPTracker.setPos(CurrentTop);
1988
1989 for (unsigned I : ScheduledSUnits) {
1990 SUnit *SU = &SUnits[I];
1991
1992 scheduleMI(SU, true);
1993
1994 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
1995 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
;
1996 }
1997
1998 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
"Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1998, __extension__
__PRETTY_FUNCTION__))
;
1999
2000 placeDebugValues();
2001
2002 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2003 dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2004 << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2005 dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2006 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2007 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
;
2008}