Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:line 1643, column 3
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AMDGPU -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIMachineScheduler.h"
15#include "SIInstrInfo.h"
16#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17#include "llvm/CodeGen/LiveIntervals.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
23
24// This scheduler implements a different scheduling algorithm than
25// GenericScheduler.
26//
27// There are several specific architecture behaviours that can't be modelled
28// for GenericScheduler:
29// . When accessing the result of an SGPR load instruction, you have to wait
30// for all the SGPR load instructions before your current instruction to
31// have finished.
32// . When accessing the result of an VGPR load instruction, you have to wait
33// for all the VGPR load instructions previous to the VGPR load instruction
34// you are interested in to finish.
35// . The less the register pressure, the best load latencies are hidden
36//
37// Moreover some specifities (like the fact a lot of instructions in the shader
38// have few dependencies) makes the generic scheduler have some unpredictable
39// behaviours. For example when register pressure becomes high, it can either
40// manage to prevent register pressure from going too high, or it can
41// increase register pressure even more than if it hadn't taken register
42// pressure into account.
43//
44// Also some other bad behaviours are generated, like loading at the beginning
45// of the shader a constant in VGPR you won't need until the end of the shader.
46//
47// The scheduling problem for SI can distinguish three main parts:
48// . Hiding high latencies (texture sampling, etc)
49// . Hiding low latencies (SGPR constant loading, etc)
50// . Keeping register usage low for better latency hiding and general
51// performance
52//
53// Some other things can also affect performance, but are hard to predict
54// (cache usage, the fact the HW can issue several instructions from different
55// wavefronts if different types, etc)
56//
57// This scheduler tries to solve the scheduling problem by dividing it into
58// simpler sub-problems. It divides the instructions into blocks, schedules
59// locally inside the blocks where it takes care of low latencies, and then
60// chooses the order of the blocks by taking care of high latencies.
61// Dividing the instructions into blocks helps control keeping register
62// usage low.
63//
64// First the instructions are put into blocks.
65// We want the blocks help control register usage and hide high latencies
66// later. To help control register usage, we typically want all local
67// computations, when for example you create a result that can be consumed
68// right away, to be contained in a block. Block inputs and outputs would
69// typically be important results that are needed in several locations of
70// the shader. Since we do want blocks to help hide high latencies, we want
71// the instructions inside the block to have a minimal set of dependencies
72// on high latencies. It will make it easy to pick blocks to hide specific
73// high latencies.
74// The block creation algorithm is divided into several steps, and several
75// variants can be tried during the scheduling process.
76//
77// Second the order of the instructions inside the blocks is chosen.
78// At that step we do take into account only register usage and hiding
79// low latency instructions
80//
81// Third the block order is chosen, there we try to hide high latencies
82// and keep register usage low.
83//
84// After the third step, a pass is done to improve the hiding of low
85// latencies.
86//
87// Actually when talking about 'low latency' or 'high latency' it includes
88// both the latency to get the cache (or global mem) data go to the register,
89// and the bandwidth limitations.
90// Increasing the number of active wavefronts helps hide the former, but it
91// doesn't solve the latter, thus why even if wavefront count is high, we have
92// to try have as many instructions hiding high latencies as possible.
93// The OpenCL doc says for example latency of 400 cycles for a global mem
94// access, which is hidden by 10 instructions if the wavefront count is 10.
95
96// Some figures taken from AMD docs:
97// Both texture and constant L1 caches are 4-way associative with 64 bytes
98// lines.
99// Constant cache is shared with 4 CUs.
100// For texture sampling, the address generation unit receives 4 texture
101// addresses per cycle, thus we could expect texture sampling latency to be
102// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
103// instructions in a wavefront group are executed every 4 cycles),
104// or 16 instructions if the other wavefronts associated to the 3 other VALUs
105// of the CU do texture sampling too. (Don't take these figures too seriously,
106// as I'm not 100% sure of the computation)
107// Data exports should get similar latency.
108// For constant loading, the cache is shader with 4 CUs.
109// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
110// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
111// It means a simple s_buffer_load should take one instruction to hide, as
112// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
113// cache line.
114//
115// As of today the driver doesn't preload the constants in cache, thus the
116// first loads get extra latency. The doc says global memory access can be
117// 300-600 cycles. We do not specially take that into account when scheduling
118// As we expect the driver to be able to preload the constants soon.
119
120// common code //
121
122#ifndef NDEBUG
123
124static const char *getReasonStr(SIScheduleCandReason Reason) {
125 switch (Reason) {
126 case NoCand: return "NOCAND";
127 case RegUsage: return "REGUSAGE";
128 case Latency: return "LATENCY";
129 case Successor: return "SUCCESSOR";
130 case Depth: return "DEPTH";
131 case NodeOrder: return "ORDER";
132 }
133 llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 133)
;
134}
135
136#endif
137
138namespace llvm {
139namespace SISched {
140static bool tryLess(int TryVal, int CandVal,
141 SISchedulerCandidate &TryCand,
142 SISchedulerCandidate &Cand,
143 SIScheduleCandReason Reason) {
144 if (TryVal < CandVal) {
145 TryCand.Reason = Reason;
146 return true;
147 }
148 if (TryVal > CandVal) {
149 if (Cand.Reason > Reason)
150 Cand.Reason = Reason;
151 return true;
152 }
153 Cand.setRepeat(Reason);
154 return false;
155}
156
157static bool tryGreater(int TryVal, int CandVal,
158 SISchedulerCandidate &TryCand,
159 SISchedulerCandidate &Cand,
160 SIScheduleCandReason Reason) {
161 if (TryVal > CandVal) {
162 TryCand.Reason = Reason;
163 return true;
164 }
165 if (TryVal < CandVal) {
166 if (Cand.Reason > Reason)
167 Cand.Reason = Reason;
168 return true;
169 }
170 Cand.setRepeat(Reason);
171 return false;
172}
173} // end namespace SISched
174} // end namespace llvm
175
176// SIScheduleBlock //
177
178void SIScheduleBlock::addUnit(SUnit *SU) {
179 NodeNum2Index[SU->NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
181}
182
183#ifndef NDEBUG
184void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
185
186 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
187 dbgs() << '\n';
188}
189#endif
190
191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
193 // Initialize the candidate if needed.
194 if (!Cand.isValid()) {
195 TryCand.Reason = NodeOrder;
196 return;
197 }
198
199 if (Cand.SGPRUsage > 60 &&
200 SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
201 TryCand, Cand, RegUsage))
202 return;
203
204 // Schedule low latency instructions as top as possible.
205 // Order of priority is:
206 // . Low latency instructions which do not depend on other low latency
207 // instructions we haven't waited for
208 // . Other instructions which do not depend on low latency instructions
209 // we haven't waited for
210 // . Low latencies
211 // . All other instructions
212 // Goal is to get: low latency instructions - independent instructions
213 // - (eventually some more low latency instructions)
214 // - instructions that depend on the first low latency instructions.
215 // If in the block there is a lot of constant loads, the SGPR usage
216 // could go quite high, thus above the arbitrary limit of 60 will encourage
217 // use the already loaded constants (in order to release some SGPRs) before
218 // loading more.
219 if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
220 Cand.HasLowLatencyNonWaitedParent,
221 TryCand, Cand, SIScheduleCandReason::Depth))
222 return;
223
224 if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
225 TryCand, Cand, SIScheduleCandReason::Depth))
226 return;
227
228 if (TryCand.IsLowLatency &&
229 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
230 TryCand, Cand, SIScheduleCandReason::Depth))
231 return;
232
233 if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
234 TryCand, Cand, RegUsage))
235 return;
236
237 // Fall through to original instruction order.
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
239 TryCand.Reason = NodeOrder;
240 }
241}
242
243SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
245
246 for (SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
250 // Predict register usage after this instruction.
251 TryCand.SU = SU;
252 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
256 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason != NoCand)
261 TopCand.setBest(TryCand);
262 }
263
264 return TopCand.SU;
265}
266
267
268// Schedule something valid.
269void SIScheduleBlock::fastSchedule() {
270 TopReadySUs.clear();
271 if (Scheduled)
272 undoSchedule();
273
274 for (SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
277 }
278
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
282 nodeScheduled(SU);
283 }
284
285 Scheduled = true;
286}
287
288// Returns if the register was set between first and last.
289static bool isDefBetween(unsigned Reg,
290 SlotIndex First, SlotIndex Last,
291 const MachineRegisterInfo *MRI,
292 const LiveIntervals *LIS) {
293 for (MachineRegisterInfo::def_instr_iterator
294 UI = MRI->def_instr_begin(Reg),
295 UE = MRI->def_instr_end(); UI != UE; ++UI) {
296 const MachineInstr* MI = &*UI;
297 if (MI->isDebugValue())
298 continue;
299 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
300 if (InstSlot >= First && InstSlot <= Last)
301 return true;
302 }
303 return false;
304}
305
306void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
307 MachineBasicBlock::iterator EndBlock) {
308 IntervalPressure Pressure, BotPressure;
309 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
310 LiveIntervals *LIS = DAG->getLIS();
311 MachineRegisterInfo *MRI = DAG->getMRI();
312 DAG->initRPTracker(TopRPTracker);
313 DAG->initRPTracker(BotRPTracker);
314 DAG->initRPTracker(RPTracker);
315
316 // Goes though all SU. RPTracker captures what had to be alive for the SUs
317 // to execute, and what is still alive at the end.
318 for (SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
320 RPTracker.advance();
321 }
322
323 // Close the RPTracker to finalize live ins/outs.
324 RPTracker.closeRegion();
325
326 // Initialize the live ins and live outs.
327 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329
330 // Do not Track Physical Registers, because it messes up.
331 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332 if (Register::isVirtualRegister(RegMaskPair.RegUnit))
333 LiveInRegs.insert(RegMaskPair.RegUnit);
334 }
335 LiveOutRegs.clear();
336 // There is several possibilities to distinguish:
337 // 1) Reg is not input to any instruction in the block, but is output of one
338 // 2) 1) + read in the block and not needed after it
339 // 3) 1) + read in the block but needed in another block
340 // 4) Reg is input of an instruction but another block will read it too
341 // 5) Reg is input of an instruction and then rewritten in the block.
342 // result is not read in the block (implies used in another block)
343 // 6) Reg is input of an instruction and then rewritten in the block.
344 // result is read in the block and not needed in another block
345 // 7) Reg is input of an instruction and then rewritten in the block.
346 // result is read in the block but also needed in another block
347 // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
348 // We want LiveOutRegs to contain only Regs whose content will be read after
349 // in another block, and whose content was written in the current block,
350 // that is we want it to get 1, 3, 5, 7
351 // Since we made the MIs of a block to be packed all together before
352 // scheduling, then the LiveIntervals were correct, and the RPTracker was
353 // able to correctly handle 5 vs 6, 2 vs 3.
354 // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
355 // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
356 // Comparing to LiveInRegs is not sufficient to differentiate 4 vs 5, 7
357 // The use of findDefBetween removes the case 4.
358 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
359 Register Reg = RegMaskPair.RegUnit;
360 if (Reg.isVirtual() &&
361 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
362 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
363 LIS)) {
364 LiveOutRegs.insert(Reg);
365 }
366 }
367
368 // Pressure = sum_alive_registers register size
369 // Internally llvm will represent some registers as big 128 bits registers
370 // for example, but they actually correspond to 4 actual 32 bits registers.
371 // Thus Pressure is not equal to num_alive_registers * constant.
372 LiveInPressure = TopPressure.MaxSetPressure;
373 LiveOutPressure = BotPressure.MaxSetPressure;
374
375 // Prepares TopRPTracker for top down scheduling.
376 TopRPTracker.closeTop();
377}
378
379void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
380 MachineBasicBlock::iterator EndBlock) {
381 if (!Scheduled)
382 fastSchedule();
383
384 // PreScheduling phase to set LiveIn and LiveOut.
385 initRegPressure(BeginBlock, EndBlock);
386 undoSchedule();
387
388 // Schedule for real now.
389
390 TopReadySUs.clear();
391
392 for (SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
395 }
396
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
400 TopRPTracker.setPos(SU->getInstr());
401 TopRPTracker.advance();
402 nodeScheduled(SU);
403 }
404
405 // TODO: compute InternalAdditionalPressure.
406 InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
407
408 // Check everything is right.
409#ifndef NDEBUG
410 assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 411, __extension__
__PRETTY_FUNCTION__))
411 TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 411, __extension__
__PRETTY_FUNCTION__))
;
412 for (SUnit* SU : SUnits) {
413 assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 414, __extension__
__PRETTY_FUNCTION__))
414 SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 414, __extension__
__PRETTY_FUNCTION__))
;
415 }
416#endif
417
418 Scheduled = true;
419}
420
421void SIScheduleBlock::undoSchedule() {
422 for (SUnit* SU : SUnits) {
423 SU->isScheduled = false;
424 for (SDep& Succ : SU->Succs) {
425 if (BC->isSUInBlock(Succ.getSUnit(), ID))
426 undoReleaseSucc(SU, &Succ);
427 }
428 }
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
431 Scheduled = false;
432}
433
434void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
435 SUnit *SuccSU = SuccEdge->getSUnit();
436
437 if (SuccEdge->isWeak()) {
438 ++SuccSU->WeakPredsLeft;
439 return;
440 }
441 ++SuccSU->NumPredsLeft;
442}
443
444void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
445 SUnit *SuccSU = SuccEdge->getSUnit();
446
447 if (SuccEdge->isWeak()) {
448 --SuccSU->WeakPredsLeft;
449 return;
450 }
451#ifndef NDEBUG
452 if (SuccSU->NumPredsLeft == 0) {
453 dbgs() << "*** Scheduling failed! ***\n";
454 DAG->dumpNode(*SuccSU);
455 dbgs() << " has been released too many times!\n";
456 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 456)
;
457 }
458#endif
459
460 --SuccSU->NumPredsLeft;
461}
462
463/// Release Successors of the SU that are in the block or not.
464void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
465 for (SDep& Succ : SU->Succs) {
466 SUnit *SuccSU = Succ.getSUnit();
467
468 if (SuccSU->NodeNum >= DAG->SUnits.size())
469 continue;
470
471 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
472 continue;
473
474 releaseSucc(SU, &Succ);
475 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
476 TopReadySUs.push_back(SuccSU);
477 }
478}
479
480void SIScheduleBlock::nodeScheduled(SUnit *SU) {
481 // Is in TopReadySUs
482 assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
__assert_fail ("!SU->NumPredsLeft", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 482, __extension__ __PRETTY_FUNCTION__))
;
483 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
484 if (I == TopReadySUs.end()) {
485 dbgs() << "Data Structure Bug in SI Scheduler\n";
486 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 486)
;
487 }
488 TopReadySUs.erase(I);
489
490 releaseSuccessors(SU, true);
491 // Scheduling this node will trigger a wait,
492 // thus propagate to other instructions that they do not need to wait either.
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
495
496 if (DAG->IsLowLatencySU[SU->NodeNum]) {
497 for (SDep& Succ : SU->Succs) {
498 std::map<unsigned, unsigned>::iterator I =
499 NodeNum2Index.find(Succ.getSUnit()->NodeNum);
500 if (I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[I->second] = 1;
502 }
503 }
504 SU->isScheduled = true;
505}
506
507void SIScheduleBlock::finalizeUnits() {
508 // We remove links from outside blocks to enable scheduling inside the block.
509 for (SUnit* SU : SUnits) {
510 releaseSuccessors(SU, false);
511 if (DAG->IsHighLatencySU[SU->NodeNum])
512 HighLatencyBlock = true;
513 }
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
515}
516
517// we maintain ascending order of IDs
518void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
519 unsigned PredID = Pred->getID();
520
521 // Check if not already predecessor.
522 for (SIScheduleBlock* P : Preds) {
523 if (PredID == P->getID())
524 return;
525 }
526 Preds.push_back(Pred);
527
528 assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
529 [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
530 SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
531 return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
532 }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
533 "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 533, __extension__
__PRETTY_FUNCTION__))
;
534}
535
536void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
537 SIScheduleBlockLinkKind Kind) {
538 unsigned SuccID = Succ->getID();
539
540 // Check if not already predecessor.
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542 if (SuccID == S.first->getID()) {
543 if (S.second == SIScheduleBlockLinkKind::NoData &&
544 Kind == SIScheduleBlockLinkKind::Data)
545 S.second = Kind;
546 return;
547 }
548 }
549 if (Succ->isHighLatencyBlock())
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::make_pair(Succ, Kind));
552
553 assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
554 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
555 "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 555, __extension__
__PRETTY_FUNCTION__))
;
556}
557
558#ifndef NDEBUG
559void SIScheduleBlock::printDebug(bool full) {
560 dbgs() << "Block (" << ID << ")\n";
561 if (!full)
562 return;
563
564 dbgs() << "\nContains High Latency Instruction: "
565 << HighLatencyBlock << '\n';
566 dbgs() << "\nDepends On:\n";
567 for (SIScheduleBlock* P : Preds) {
568 P->printDebug(false);
569 }
570
571 dbgs() << "\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
573 if (S.second == SIScheduleBlockLinkKind::Data)
574 dbgs() << "(Data Dep) ";
575 S.first->printDebug(false);
576 }
577
578 if (Scheduled) {
579 dbgs() << "LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
582 dbgs() << "LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
585 dbgs() << "LiveIns:\n";
586 for (unsigned Reg : LiveInRegs)
587 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
588
589 dbgs() << "\nLiveOuts:\n";
590 for (unsigned Reg : LiveOutRegs)
591 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
592 }
593
594 dbgs() << "\nInstructions:\n";
595 for (const SUnit* SU : SUnits)
596 DAG->dumpNode(*SU);
597
598 dbgs() << "///////////////////////\n";
599}
600#endif
601
602// SIScheduleBlockCreator //
603
604SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
605 : DAG(DAG) {}
606
607SIScheduleBlocks
608SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
610 Blocks.find(BlockVariant);
611 if (B == Blocks.end()) {
612 SIScheduleBlocks Res;
613 createBlocksForVariant(BlockVariant);
614 topologicalSort();
615 scheduleInsideBlocks();
616 fillStats();
617 Res.Blocks = CurrentBlocks;
618 Res.TopDownIndex2Block = TopDownIndex2Block;
619 Res.TopDownBlock2Index = TopDownBlock2Index;
620 Blocks[BlockVariant] = Res;
621 return Res;
622 } else {
623 return B->second;
624 }
625}
626
627bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
628 if (SU->NodeNum >= DAG->SUnits.size())
629 return false;
630 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
631}
632
633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->SUnits.size();
635
636 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
637 SUnit *SU = &DAG->SUnits[i];
638 if (DAG->IsHighLatencySU[SU->NodeNum]) {
639 CurrentColoring[SU->NodeNum] = NextReservedID++;
640 }
641 }
642}
643
644static bool
645hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
646 for (const auto &PredDep : SU.Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
648 PredDep.getKind() == llvm::SDep::Data)
649 return true;
650 }
651 return false;
652}
653
654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->SUnits.size();
656 unsigned NumHighLatencies = 0;
657 unsigned GroupSize;
658 int Color = NextReservedID;
659 unsigned Count = 0;
660 std::set<unsigned> FormingGroup;
661
662 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
663 SUnit *SU = &DAG->SUnits[i];
664 if (DAG->IsHighLatencySU[SU->NodeNum])
665 ++NumHighLatencies;
666 }
667
668 if (NumHighLatencies == 0)
669 return;
670
671 if (NumHighLatencies <= 6)
672 GroupSize = 2;
673 else if (NumHighLatencies <= 12)
674 GroupSize = 3;
675 else
676 GroupSize = 4;
677
678 for (unsigned SUNum : DAG->TopDownIndex2SU) {
679 const SUnit &SU = DAG->SUnits[SUNum];
680 if (DAG->IsHighLatencySU[SU.NodeNum]) {
681 unsigned CompatibleGroup = true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
684
685 // We don't want to put in the same block
686 // two high latency instructions that depend
687 // on each other.
688 // One way would be to check canAddEdge
689 // in both directions, but that currently is not
690 // enough because there the high latency order is
691 // enforced (via links).
692 // Instead, look at the dependencies between the
693 // high latency instructions and deduce if it is
694 // a data dependency or not.
695 for (unsigned j : FormingGroup) {
696 bool HasSubGraph;
697 std::vector<int> SubGraph;
698 // By construction (topological order), if SU and
699 // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
700 // in the parent graph of SU.
701#ifndef NDEBUG
702 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
703 HasSubGraph);
704 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 704, __extension__ __PRETTY_FUNCTION__))
;
705#endif
706 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
707 HasSubGraph);
708 if (!HasSubGraph)
709 continue; // No dependencies between each other
710 else if (SubGraph.size() > 5) {
711 // Too many elements would be required to be added to the block.
712 CompatibleGroup = false;
713 break;
714 }
715 else {
716 // Check the type of dependency
717 for (unsigned k : SubGraph) {
718 // If in the path to join the two instructions,
719 // there is another high latency instruction,
720 // or instructions colored for another block
721 // abort the merge.
722 if (DAG->IsHighLatencySU[k] ||
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup = false;
726 break;
727 }
728 // If one of the SU in the subgraph depends on the result of SU j,
729 // there'll be a data dependency.
730 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
731 CompatibleGroup = false;
732 break;
733 }
734 }
735 if (!CompatibleGroup)
736 break;
737 // Same check for the SU
738 if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
739 CompatibleGroup = false;
740 break;
741 }
742 // Add all the required instructions to the block
743 // These cannot live in another block (because they
744 // depend (order dependency) on one of the
745 // instruction in the block, and are required for the
746 // high latency instruction we add.
747 llvm::append_range(AdditionalElements, SubGraph);
748 }
749 }
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.NodeNum);
752 for (unsigned j : AdditionalElements)
753 CurrentColoring[j] = ProposedColor;
754 CurrentColoring[SU.NodeNum] = ProposedColor;
755 ++Count;
756 }
757 // Found one incompatible instruction,
758 // or has filled a big enough group.
759 // -> start a new one.
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.NodeNum);
765 CurrentColoring[SU.NodeNum] = ProposedColor;
766 Count = 0;
767 } else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
771 Count = 0;
772 }
773 }
774 }
775}
776
777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->SUnits.size();
779 std::map<std::set<unsigned>, unsigned> ColorCombinations;
780
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
783
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786
787 // Traverse TopDown, and give different colors to SUs depending
788 // on which combination of High Latencies they depend on.
789
790 for (unsigned SUNum : DAG->TopDownIndex2SU) {
791 SUnit *SU = &DAG->SUnits[SUNum];
792 std::set<unsigned> SUColors;
793
794 // Already given.
795 if (CurrentColoring[SU->NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
797 CurrentColoring[SU->NodeNum];
798 continue;
799 }
800
801 for (SDep& PredDep : SU->Preds) {
802 SUnit *Pred = PredDep.getSUnit();
803 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
804 continue;
805 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
807 }
808 // Color 0 by default.
809 if (SUColors.empty())
810 continue;
811 // Same color than parents.
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
814 *SUColors.begin();
815 else {
816 std::map<std::set<unsigned>, unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
820 } else {
821 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
822 NextNonReservedID;
823 ColorCombinations[SUColors] = NextNonReservedID++;
824 }
825 }
826 }
827
828 ColorCombinations.clear();
829
830 // Same as before, but BottomUp.
831
832 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
833 SUnit *SU = &DAG->SUnits[SUNum];
834 std::set<unsigned> SUColors;
835
836 // Already given.
837 if (CurrentColoring[SU->NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
839 CurrentColoring[SU->NodeNum];
840 continue;
841 }
842
843 for (SDep& SuccDep : SU->Succs) {
844 SUnit *Succ = SuccDep.getSUnit();
845 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
846 continue;
847 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
849 }
850 // Keep color 0.
851 if (SUColors.empty())
852 continue;
853 // Same color than parents.
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
856 *SUColors.begin();
857 else {
858 std::map<std::set<unsigned>, unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
862 } else {
863 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
864 NextNonReservedID;
865 ColorCombinations[SUColors] = NextNonReservedID++;
866 }
867 }
868 }
869}
870
871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
873
874 // Every combination of colors given by the top down
875 // and bottom up Reserved node dependency
876
877 for (const SUnit &SU : DAG->SUnits) {
878 std::pair<unsigned, unsigned> SUColors;
879
880 // High latency instructions: already given.
881 if (CurrentColoring[SU.NodeNum])
882 continue;
883
884 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.NodeNum];
885 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.NodeNum];
886
887 std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
888 ColorCombinations.find(SUColors);
889 if (Pos != ColorCombinations.end()) {
890 CurrentColoring[SU.NodeNum] = Pos->second;
891 } else {
892 CurrentColoring[SU.NodeNum] = NextNonReservedID;
893 ColorCombinations[SUColors] = NextNonReservedID++;
894 }
895 }
896}
897
898void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
899 unsigned DAGSize = DAG->SUnits.size();
900 std::vector<int> PendingColoring = CurrentColoring;
901
902 assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
903 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
904 CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 904, __extension__
__PRETTY_FUNCTION__))
;
905 // If there is no reserved block at all, do nothing. We don't want
906 // everything in one block.
907 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
908 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
909 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
910 CurrentTopDownReservedDependencyColoring.end()) == 0)
911 return;
912
913 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
914 SUnit *SU = &DAG->SUnits[SUNum];
915 std::set<unsigned> SUColors;
916 std::set<unsigned> SUColorsPending;
917
918 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
919 continue;
920
921 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
922 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
923 continue;
924
925 for (SDep& SuccDep : SU->Succs) {
926 SUnit *Succ = SuccDep.getSUnit();
927 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
928 continue;
929 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
930 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
931 SUColors.insert(CurrentColoring[Succ->NodeNum]);
932 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
933 }
934 // If there is only one child/parent block, and that block
935 // is not among the ones we are removing in this path, then
936 // merge the instruction to that block
937 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
938 PendingColoring[SU->NodeNum] = *SUColors.begin();
939 else // TODO: Attribute new colors depending on color
940 // combination of children.
941 PendingColoring[SU->NodeNum] = NextNonReservedID++;
942 }
943 CurrentColoring = PendingColoring;
944}
945
946
947void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
948 unsigned DAGSize = DAG->SUnits.size();
949 unsigned PreviousColor;
950 std::set<unsigned> SeenColors;
951
952 if (DAGSize <= 1)
953 return;
954
955 PreviousColor = CurrentColoring[0];
956
957 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
958 SUnit *SU = &DAG->SUnits[i];
959 unsigned CurrentColor = CurrentColoring[i];
960 unsigned PreviousColorSave = PreviousColor;
961 assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
("i == SU->NodeNum", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 961, __extension__ __PRETTY_FUNCTION__))
;
962
963 if (CurrentColor != PreviousColor)
964 SeenColors.insert(PreviousColor);
965 PreviousColor = CurrentColor;
966
967 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
968 continue;
969
970 if (SeenColors.find(CurrentColor) == SeenColors.end())
971 continue;
972
973 if (PreviousColorSave != CurrentColor)
974 CurrentColoring[i] = NextNonReservedID++;
975 else
976 CurrentColoring[i] = CurrentColoring[i-1];
977 }
978}
979
980void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
981 unsigned DAGSize = DAG->SUnits.size();
982
983 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
984 SUnit *SU = &DAG->SUnits[SUNum];
985 std::set<unsigned> SUColors;
986
987 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
988 continue;
989
990 // No predecessor: Vgpr constant loading.
991 // Low latency instructions usually have a predecessor (the address)
992 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
993 continue;
994
995 for (SDep& SuccDep : SU->Succs) {
996 SUnit *Succ = SuccDep.getSUnit();
997 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
998 continue;
999 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1000 }
1001 if (SUColors.size() == 1)
1002 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1003 }
1004}
1005
1006void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1007 unsigned DAGSize = DAG->SUnits.size();
1008
1009 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1010 SUnit *SU = &DAG->SUnits[SUNum];
1011 std::set<unsigned> SUColors;
1012
1013 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1014 continue;
1015
1016 for (SDep& SuccDep : SU->Succs) {
1017 SUnit *Succ = SuccDep.getSUnit();
1018 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1019 continue;
1020 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1021 }
1022 if (SUColors.size() == 1)
1023 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1024 }
1025}
1026
1027void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1028 unsigned DAGSize = DAG->SUnits.size();
1029
1030 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1031 SUnit *SU = &DAG->SUnits[SUNum];
1032 std::set<unsigned> SUColors;
1033
1034 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1035 continue;
1036
1037 for (SDep& SuccDep : SU->Succs) {
1038 SUnit *Succ = SuccDep.getSUnit();
1039 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1040 continue;
1041 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1042 }
1043 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1044 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1045 }
1046}
1047
1048void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1049 unsigned DAGSize = DAG->SUnits.size();
1050 std::map<unsigned, unsigned> ColorCount;
1051
1052 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1053 SUnit *SU = &DAG->SUnits[SUNum];
1054 unsigned color = CurrentColoring[SU->NodeNum];
1055 ++ColorCount[color];
1056 }
1057
1058 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1059 SUnit *SU = &DAG->SUnits[SUNum];
1060 unsigned color = CurrentColoring[SU->NodeNum];
1061 std::set<unsigned> SUColors;
1062
1063 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1064 continue;
1065
1066 if (ColorCount[color] > 1)
1067 continue;
1068
1069 for (SDep& SuccDep : SU->Succs) {
1070 SUnit *Succ = SuccDep.getSUnit();
1071 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1072 continue;
1073 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1074 }
1075 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1076 --ColorCount[color];
1077 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1078 ++ColorCount[*SUColors.begin()];
1079 }
1080 }
1081}
1082
1083void SIScheduleBlockCreator::cutHugeBlocks() {
1084 // TODO
1085}
1086
1087void SIScheduleBlockCreator::regroupNoUserInstructions() {
1088 unsigned DAGSize = DAG->SUnits.size();
1089 int GroupID = NextNonReservedID++;
1090
1091 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1092 SUnit *SU = &DAG->SUnits[SUNum];
1093 bool hasSuccessor = false;
1094
1095 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1096 continue;
1097
1098 for (SDep& SuccDep : SU->Succs) {
1099 SUnit *Succ = SuccDep.getSUnit();
1100 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1101 continue;
1102 hasSuccessor = true;
1103 }
1104 if (!hasSuccessor)
1105 CurrentColoring[SU->NodeNum] = GroupID;
1106 }
1107}
1108
1109void SIScheduleBlockCreator::colorExports() {
1110 unsigned ExportColor = NextNonReservedID++;
1111 SmallVector<unsigned, 8> ExpGroup;
1112
1113 // Put all exports together in a block.
1114 // The block will naturally end up being scheduled last,
1115 // thus putting exports at the end of the schedule, which
1116 // is better for performance.
1117 // However we must ensure, for safety, the exports can be put
1118 // together in the same block without any other instruction.
1119 // This could happen, for example, when scheduling after regalloc
1120 // if reloading a spilled register from memory using the same
1121 // register than used in a previous export.
1122 // If that happens, do not regroup the exports.
1123 for (unsigned SUNum : DAG->TopDownIndex2SU) {
1124 const SUnit &SU = DAG->SUnits[SUNum];
1125 if (SIInstrInfo::isEXP(*SU.getInstr())) {
1126 // Check the EXP can be added to the group safely,
1127 // ie without needing any other instruction.
1128 // The EXP is allowed to depend on other EXP
1129 // (they will be in the same group).
1130 for (unsigned j : ExpGroup) {
1131 bool HasSubGraph;
1132 std::vector<int> SubGraph;
1133 // By construction (topological order), if SU and
1134 // DAG->SUnits[j] are linked, DAG->SUnits[j] is necessary
1135 // in the parent graph of SU.
1136#ifndef NDEBUG
1137 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1138 HasSubGraph);
1139 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1139, __extension__ __PRETTY_FUNCTION__))
;
1140#endif
1141 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1142 HasSubGraph);
1143 if (!HasSubGraph)
1144 continue; // No dependencies between each other
1145
1146 // SubGraph contains all the instructions required
1147 // between EXP SUnits[j] and EXP SU.
1148 for (unsigned k : SubGraph) {
1149 if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1150 // Other instructions than EXP would be required in the group.
1151 // Abort the grouping.
1152 return;
1153 }
1154 }
1155
1156 ExpGroup.push_back(SUNum);
1157 }
1158 }
1159
1160 // The group can be formed. Give the color.
1161 for (unsigned j : ExpGroup)
1162 CurrentColoring[j] = ExportColor;
1163}
1164
1165void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1166 unsigned DAGSize = DAG->SUnits.size();
1167 std::map<unsigned,unsigned> RealID;
1168
1169 CurrentBlocks.clear();
1170 CurrentColoring.clear();
1171 CurrentColoring.resize(DAGSize, 0);
1172 Node2CurrentBlock.clear();
1173
1174 // Restore links previous scheduling variant has overridden.
1175 DAG->restoreSULinksLeft();
1176
1177 NextReservedID = 1;
1178 NextNonReservedID = DAGSize + 1;
1179
1180 LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false)
;
1181
1182 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1183 colorHighLatenciesGroups();
1184 else
1185 colorHighLatenciesAlone();
1186 colorComputeReservedDependencies();
1187 colorAccordingToReservedDependencies();
1188 colorEndsAccordingToDependencies();
1189 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1190 colorForceConsecutiveOrderInGroup();
1191 regroupNoUserInstructions();
1192 colorMergeConstantLoadsNextGroup();
1193 colorMergeIfPossibleNextGroupOnlyForReserved();
1194 colorExports();
1195
1196 // Put SUs of same color into same block
1197 Node2CurrentBlock.resize(DAGSize, -1);
1198 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1199 SUnit *SU = &DAG->SUnits[i];
1200 unsigned Color = CurrentColoring[SU->NodeNum];
1201 if (RealID.find(Color) == RealID.end()) {
1202 int ID = CurrentBlocks.size();
1203 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
1204 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1205 RealID[Color] = ID;
1206 }
1207 CurrentBlocks[RealID[Color]]->addUnit(SU);
1208 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1209 }
1210
1211 // Build dependencies between blocks.
1212 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1213 SUnit *SU = &DAG->SUnits[i];
1214 int SUID = Node2CurrentBlock[i];
1215 for (SDep& SuccDep : SU->Succs) {
1216 SUnit *Succ = SuccDep.getSUnit();
1217 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1218 continue;
1219 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1220 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1221 SuccDep.isCtrl() ? NoData : Data);
1222 }
1223 for (SDep& PredDep : SU->Preds) {
1224 SUnit *Pred = PredDep.getSUnit();
1225 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1226 continue;
1227 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1228 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1229 }
1230 }
1231
1232 // Free root and leafs of all blocks to enable scheduling inside them.
1233 for (SIScheduleBlock *Block : CurrentBlocks)
1234 Block->finalizeUnits();
1235 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1236 dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1237 for (SIScheduleBlock *Block : CurrentBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1238 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
1239 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "Blocks created:\n\n"
; for (SIScheduleBlock *Block : CurrentBlocks) Block->printDebug
(true); }; } } while (false)
;
1240}
1241
1242// Two functions taken from Codegen/MachineScheduler.cpp
1243
1244/// Non-const version.
1245static MachineBasicBlock::iterator
1246nextIfDebug(MachineBasicBlock::iterator I,
1247 MachineBasicBlock::const_iterator End) {
1248 for (; I != End; ++I) {
1249 if (!I->isDebugInstr())
1250 break;
1251 }
1252 return I;
1253}
1254
1255void SIScheduleBlockCreator::topologicalSort() {
1256 unsigned DAGSize = CurrentBlocks.size();
1257 std::vector<int> WorkList;
1258
1259 LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false)
;
1260
1261 WorkList.reserve(DAGSize);
1262 TopDownIndex2Block.resize(DAGSize);
1263 TopDownBlock2Index.resize(DAGSize);
1264 BottomUpIndex2Block.resize(DAGSize);
1265
1266 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1267 SIScheduleBlock *Block = CurrentBlocks[i];
1268 unsigned Degree = Block->getSuccs().size();
1269 TopDownBlock2Index[i] = Degree;
1270 if (Degree == 0) {
1271 WorkList.push_back(i);
1272 }
1273 }
1274
1275 int Id = DAGSize;
1276 while (!WorkList.empty()) {
1277 int i = WorkList.back();
1278 SIScheduleBlock *Block = CurrentBlocks[i];
1279 WorkList.pop_back();
1280 TopDownBlock2Index[i] = --Id;
1281 TopDownIndex2Block[Id] = i;
1282 for (SIScheduleBlock* Pred : Block->getPreds()) {
1283 if (!--TopDownBlock2Index[Pred->getID()])
1284 WorkList.push_back(Pred->getID());
1285 }
1286 }
1287
1288#ifndef NDEBUG
1289 // Check correctness of the ordering.
1290 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1291 SIScheduleBlock *Block = CurrentBlocks[i];
1292 for (SIScheduleBlock* Pred : Block->getPreds()) {
1293 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1294, __extension__
__PRETTY_FUNCTION__))
1294 "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1294, __extension__
__PRETTY_FUNCTION__))
;
1295 }
1296 }
1297#endif
1298
1299 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1300 TopDownIndex2Block.rend());
1301}
1302
1303void SIScheduleBlockCreator::scheduleInsideBlocks() {
1304 unsigned DAGSize = CurrentBlocks.size();
1305
1306 LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false)
;
1307
1308 // We do schedule a valid scheduling such that a Block corresponds
1309 // to a range of instructions.
1310 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false)
;
1311 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1312 SIScheduleBlock *Block = CurrentBlocks[i];
1313 Block->fastSchedule();
1314 }
1315
1316 // Note: the following code, and the part restoring previous position
1317 // is by far the most expensive operation of the Scheduler.
1318
1319 // Do not update CurrentTop.
1320 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1321 std::vector<MachineBasicBlock::iterator> PosOld;
1322 std::vector<MachineBasicBlock::iterator> PosNew;
1323 PosOld.reserve(DAG->SUnits.size());
1324 PosNew.reserve(DAG->SUnits.size());
1325
1326 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1327 int BlockIndice = TopDownIndex2Block[i];
1328 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1329 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1330
1331 for (SUnit* SU : SUs) {
1332 MachineInstr *MI = SU->getInstr();
1333 MachineBasicBlock::iterator Pos = MI;
1334 PosOld.push_back(Pos);
1335 if (&*CurrentTopFastSched == MI) {
1336 PosNew.push_back(Pos);
1337 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1338 DAG->getCurrentBottom());
1339 } else {
1340 // Update the instruction stream.
1341 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1342
1343 // Update LiveIntervals.
1344 // Note: Moving all instructions and calling handleMove every time
1345 // is the most cpu intensive operation of the scheduler.
1346 // It would gain a lot if there was a way to recompute the
1347 // LiveIntervals for the entire scheduling region.
1348 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1349 PosNew.push_back(CurrentTopFastSched);
1350 }
1351 }
1352 }
1353
1354 // Now we have Block of SUs == Block of MI.
1355 // We do the final schedule for the instructions inside the block.
1356 // The property that all the SUs of the Block are grouped together as MI
1357 // is used for correct reg usage tracking.
1358 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1359 SIScheduleBlock *Block = CurrentBlocks[i];
1360 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1361 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1362 }
1363
1364 LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false)
;
1365 // Restore old ordering (which prevents a LIS->handleMove bug).
1366 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1367 MachineBasicBlock::iterator POld = PosOld[i-1];
1368 MachineBasicBlock::iterator PNew = PosNew[i-1];
1369 if (PNew != POld) {
1370 // Update the instruction stream.
1371 DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1372
1373 // Update LiveIntervals.
1374 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1375 }
1376 }
1377
1378 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1379 for (SIScheduleBlock *Block : CurrentBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1380 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
1381 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { for (SIScheduleBlock *Block : CurrentBlocks
) Block->printDebug(true); }; } } while (false)
;
1382}
1383
1384void SIScheduleBlockCreator::fillStats() {
1385 unsigned DAGSize = CurrentBlocks.size();
1386
1387 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1388 int BlockIndice = TopDownIndex2Block[i];
1389 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1390 if (Block->getPreds().empty())
1391 Block->Depth = 0;
1392 else {
1393 unsigned Depth = 0;
1394 for (SIScheduleBlock *Pred : Block->getPreds()) {
1395 if (Depth < Pred->Depth + Pred->getCost())
1396 Depth = Pred->Depth + Pred->getCost();
1397 }
1398 Block->Depth = Depth;
1399 }
1400 }
1401
1402 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1403 int BlockIndice = BottomUpIndex2Block[i];
1404 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1405 if (Block->getSuccs().empty())
1406 Block->Height = 0;
1407 else {
1408 unsigned Height = 0;
1409 for (const auto &Succ : Block->getSuccs())
1410 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1411 Block->Height = Height;
1412 }
1413 }
1414}
1415
1416// SIScheduleBlockScheduler //
1417
1418SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1419 SISchedulerBlockSchedulerVariant Variant,
1420 SIScheduleBlocks BlocksStruct) :
1421 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1422 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1423 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1424
1425 // Fill the usage of every output
1426 // Warning: while by construction we always have a link between two blocks
1427 // when one needs a result from the other, the number of users of an output
1428 // is not the sum of child blocks having as input the same virtual register.
1429 // Here is an example. A produces x and y. B eats x and produces x'.
1430 // C eats x' and y. The register coalescer may have attributed the same
1431 // virtual register to x and x'.
1432 // To count accurately, we do a topological sort. In case the register is
1433 // found for several parents, we increment the usage of the one with the
1434 // highest topological index.
1435 LiveOutRegsNumUsages.resize(Blocks.size());
1436 for (SIScheduleBlock *Block : Blocks) {
1437 for (unsigned Reg : Block->getInRegs()) {
1438 bool Found = false;
1439 int topoInd = -1;
1440 for (SIScheduleBlock* Pred: Block->getPreds()) {
1441 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1442 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1443
1444 if (RegPos != PredOutRegs.end()) {
1445 Found = true;
1446 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1447 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1448 }
1449 }
1450 }
1451
1452 if (!Found)
1453 continue;
1454
1455 int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1456 ++LiveOutRegsNumUsages[PredID][Reg];
1457 }
1458 }
1459
1460 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1461 BlockNumPredsLeft.resize(Blocks.size());
1462 BlockNumSuccsLeft.resize(Blocks.size());
1463
1464 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
9
Assuming 'i' is equal to 'e'
10
Loop condition is false. Execution continues on line 1471
1465 SIScheduleBlock *Block = Blocks[i];
1466 BlockNumPredsLeft[i] = Block->getPreds().size();
1467 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1468 }
1469
1470#ifndef NDEBUG
1471 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
11
Assuming 'i' is equal to 'e'
12
Loop condition is false. Execution continues on line 1477
1472 SIScheduleBlock *Block = Blocks[i];
1473 assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
: __assert_fail ("Block->getID() == i", "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1473, __extension__ __PRETTY_FUNCTION__))
;
1474 }
1475#endif
1476
1477 std::set<unsigned> InRegs = DAG->getInRegs();
1478 addLiveRegs(InRegs);
1479
1480 // Increase LiveOutRegsNumUsages for blocks
1481 // producing registers consumed in another
1482 // scheduling region.
1483 for (unsigned Reg : DAG->getOutRegs()) {
1484 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1485 // Do reverse traversal
1486 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1487 SIScheduleBlock *Block = Blocks[ID];
1488 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1489
1490 if (OutRegs.find(Reg) == OutRegs.end())
1491 continue;
1492
1493 ++LiveOutRegsNumUsages[ID][Reg];
1494 break;
1495 }
1496 }
1497
1498 // Fill LiveRegsConsumers for regs that were already
1499 // defined before scheduling.
1500 for (SIScheduleBlock *Block : Blocks) {
1501 for (unsigned Reg : Block->getInRegs()) {
1502 bool Found = false;
1503 for (SIScheduleBlock* Pred: Block->getPreds()) {
1504 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1505 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1506
1507 if (RegPos != PredOutRegs.end()) {
1508 Found = true;
1509 break;
1510 }
1511 }
1512
1513 if (!Found)
1514 ++LiveRegsConsumers[Reg];
1515 }
1516 }
1517
1518 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
13
Assuming 'i' is equal to 'e'
14
Loop condition is false. Execution continues on line 1525
1519 SIScheduleBlock *Block = Blocks[i];
1520 if (BlockNumPredsLeft[i] == 0) {
1521 ReadyBlocks.push_back(Block);
1522 }
1523 }
1524
1525 while (SIScheduleBlock *Block = pickBlock()) {
15
Calling 'SIScheduleBlockScheduler::pickBlock'
1526 BlocksScheduled.push_back(Block);
1527 blockScheduled(Block);
1528 }
1529
1530 LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1531 : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1532 dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1533 } dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
;
1534}
1535
1536bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1537 SIBlockSchedCandidate &TryCand) {
1538 if (!Cand.isValid()) {
1539 TryCand.Reason = NodeOrder;
1540 return true;
1541 }
1542
1543 // Try to hide high latencies.
1544 if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
1545 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1546 return true;
1547 // Schedule high latencies early so you can hide them better.
1548 if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1549 TryCand, Cand, Latency))
1550 return true;
1551 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1552 TryCand, Cand, Depth))
1553 return true;
1554 if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
1555 Cand.NumHighLatencySuccessors,
1556 TryCand, Cand, Successor))
1557 return true;
1558 return false;
1559}
1560
1561bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1562 SIBlockSchedCandidate &TryCand) {
1563 if (!Cand.isValid()) {
1564 TryCand.Reason = NodeOrder;
1565 return true;
1566 }
1567
1568 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1569 TryCand, Cand, RegUsage))
1570 return true;
1571 if (SISched::tryGreater(TryCand.NumSuccessors > 0,
1572 Cand.NumSuccessors > 0,
1573 TryCand, Cand, Successor))
1574 return true;
1575 if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1576 return true;
1577 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1578 TryCand, Cand, RegUsage))
1579 return true;
1580 return false;
1581}
1582
1583SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1584 SIBlockSchedCandidate Cand;
1585 std::vector<SIScheduleBlock*>::iterator Best;
1586 SIScheduleBlock *Block;
1587 if (ReadyBlocks.empty())
16
Assuming the condition is false
17
Taking false branch
1588 return nullptr;
1589
1590 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1591 VregCurrentUsage, SregCurrentUsage);
1592 if (VregCurrentUsage > maxVregUsage)
18
Assuming field 'VregCurrentUsage' is <= field 'maxVregUsage'
19
Taking false branch
1593 maxVregUsage = VregCurrentUsage;
1594 if (SregCurrentUsage > maxSregUsage)
20
Assuming field 'SregCurrentUsage' is <= field 'maxSregUsage'
1595 maxSregUsage = SregCurrentUsage;
1596 LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
21
Taking false branch
22
Assuming 'DebugFlag' is true
23
Assuming the condition is false
24
Taking false branch
25
Loop condition is false. Exiting loop
1597 for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1598 : ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1599 << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1600 dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1601 for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1602 : LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1603 << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1604 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1605 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1606 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
;
1607
1608 Cand.Block = nullptr;
26
Null pointer value stored to 'Cand.Block'
1609 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1610 E = ReadyBlocks.end(); I != E; ++I) {
1611 SIBlockSchedCandidate TryCand;
1612 TryCand.Block = *I;
1613 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1614 TryCand.VGPRUsageDiff =
1615 checkRegUsageImpact(TryCand.Block->getInRegs(),
1616 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1617 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1618 TryCand.NumHighLatencySuccessors =
1619 TryCand.Block->getNumHighLatencySuccessors();
1620 TryCand.LastPosHighLatParentScheduled =
1621 (unsigned int) std::max<int> (0,
1622 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1623 LastPosWaitedHighLatency);
1624 TryCand.Height = TryCand.Block->Height;
1625 // Try not to increase VGPR usage too much, else we may spill.
1626 if (VregCurrentUsage > 120 ||
1627 Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1628 if (!tryCandidateRegUsage(Cand, TryCand) &&
1629 Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1630 tryCandidateLatency(Cand, TryCand);
1631 } else {
1632 if (!tryCandidateLatency(Cand, TryCand))
1633 tryCandidateRegUsage(Cand, TryCand);
1634 }
1635 if (TryCand.Reason != NoCand) {
1636 Cand.setBest(TryCand);
1637 Best = I;
1638 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
1639 << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
;
1640 }
1641 }
1642
1643 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
27
Loop condition is false. Execution continues on line 1643
28
Assuming 'DebugFlag' is true
29
Assuming the condition is true
30
Taking true branch
31
Called C++ object pointer is null
1644 dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1645 << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1646 dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1647 << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1648 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1649 dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
;
1650
1651 Block = Cand.Block;
1652 ReadyBlocks.erase(Best);
1653 return Block;
1654}
1655
1656// Tracking of currently alive registers to determine VGPR Usage.
1657
1658void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1659 for (Register Reg : Regs) {
1660 // For now only track virtual registers.
1661 if (!Reg.isVirtual())
1662 continue;
1663 // If not already in the live set, then add it.
1664 (void) LiveRegs.insert(Reg);
1665 }
1666}
1667
1668void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1669 std::set<unsigned> &Regs) {
1670 for (unsigned Reg : Regs) {
1671 // For now only track virtual registers.
1672 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1673 assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1675, __extension__
__PRETTY_FUNCTION__))
1674 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1675, __extension__
__PRETTY_FUNCTION__))
1675 LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1675, __extension__
__PRETTY_FUNCTION__))
;
1676 --LiveRegsConsumers[Reg];
1677 if (LiveRegsConsumers[Reg] == 0)
1678 LiveRegs.erase(Pos);
1679 }
1680}
1681
1682void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1683 for (const auto &Block : Parent->getSuccs()) {
1684 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1685 ReadyBlocks.push_back(Block.first);
1686
1687 if (Parent->isHighLatencyBlock() &&
1688 Block.second == SIScheduleBlockLinkKind::Data)
1689 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1690 }
1691}
1692
1693void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1694 decreaseLiveRegs(Block, Block->getInRegs());
1695 addLiveRegs(Block->getOutRegs());
1696 releaseBlockSuccs(Block);
1697 for (const auto &RegP : LiveOutRegsNumUsages[Block->getID()]) {
1698 // We produce this register, thus it must not be previously alive.
1699 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1700, __extension__
__PRETTY_FUNCTION__))
1700 LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1700, __extension__
__PRETTY_FUNCTION__))
;
1701 LiveRegsConsumers[RegP.first] += RegP.second;
1702 }
1703 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1704 (unsigned)LastPosWaitedHighLatency)
1705 LastPosWaitedHighLatency =
1706 LastPosHighLatencyParentScheduled[Block->getID()];
1707 ++NumBlockScheduled;
1708}
1709
1710std::vector<int>
1711SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1712 std::set<unsigned> &OutRegs) {
1713 std::vector<int> DiffSetPressure;
1714 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1715
1716 for (Register Reg : InRegs) {
1717 // For now only track virtual registers.
1718 if (!Reg.isVirtual())
1719 continue;
1720 if (LiveRegsConsumers[Reg] > 1)
1721 continue;
1722 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1723 for (; PSetI.isValid(); ++PSetI) {
1724 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1725 }
1726 }
1727
1728 for (Register Reg : OutRegs) {
1729 // For now only track virtual registers.
1730 if (!Reg.isVirtual())
1731 continue;
1732 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1733 for (; PSetI.isValid(); ++PSetI) {
1734 DiffSetPressure[*PSetI] += PSetI.getWeight();
1735 }
1736 }
1737
1738 return DiffSetPressure;
1739}
1740
1741// SIScheduler //
1742
1743struct SIScheduleBlockResult
1744SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1745 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1746 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1747 SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
8
Calling constructor for 'SIScheduleBlockScheduler'
1748 std::vector<SIScheduleBlock*> ScheduledBlocks;
1749 struct SIScheduleBlockResult Res;
1750
1751 ScheduledBlocks = Scheduler.getBlocks();
1752
1753 for (SIScheduleBlock *Block : ScheduledBlocks) {
1754 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1755
1756 for (SUnit* SU : SUs)
1757 Res.SUs.push_back(SU->NodeNum);
1758 }
1759
1760 Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1761 Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1762 return Res;
1763}
1764
1765// SIScheduleDAGMI //
1766
1767SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1768 ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
1769 SITII = static_cast<const SIInstrInfo*>(TII);
1770 SITRI = static_cast<const SIRegisterInfo*>(TRI);
1771}
1772
1773SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1774
1775// Code adapted from scheduleDAG.cpp
1776// Does a topological sort over the SUs.
1777// Both TopDown and BottomUp
1778void SIScheduleDAGMI::topologicalSort() {
1779 Topo.InitDAGTopologicalSorting();
1780
1781 TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1782 BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1783}
1784
1785// Move low latencies further from their user without
1786// increasing SGPR usage (in general)
1787// This is to be replaced by a better pass that would
1788// take into account SGPR usage (based on VGPR Usage
1789// and the corresponding wavefront count), that would
1790// try to merge groups of loads if it make sense, etc
1791void SIScheduleDAGMI::moveLowLatencies() {
1792 unsigned DAGSize = SUnits.size();
1793 int LastLowLatencyUser = -1;
1794 int LastLowLatencyPos = -1;
1795
1796 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1797 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1798 bool IsLowLatencyUser = false;
1799 unsigned MinPos = 0;
1800
1801 for (SDep& PredDep : SU->Preds) {
1802 SUnit *Pred = PredDep.getSUnit();
1803 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1804 IsLowLatencyUser = true;
1805 }
1806 if (Pred->NodeNum >= DAGSize)
1807 continue;
1808 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1809 if (PredPos >= MinPos)
1810 MinPos = PredPos + 1;
1811 }
1812
1813 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1814 unsigned BestPos = LastLowLatencyUser + 1;
1815 if ((int)BestPos <= LastLowLatencyPos)
1816 BestPos = LastLowLatencyPos + 1;
1817 if (BestPos < MinPos)
1818 BestPos = MinPos;
1819 if (BestPos < i) {
1820 for (unsigned u = i; u > BestPos; --u) {
1821 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1822 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1823 }
1824 ScheduledSUnits[BestPos] = SU->NodeNum;
1825 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1826 }
1827 LastLowLatencyPos = BestPos;
1828 if (IsLowLatencyUser)
1829 LastLowLatencyUser = BestPos;
1830 } else if (IsLowLatencyUser) {
1831 LastLowLatencyUser = i;
1832 // Moves COPY instructions on which depends
1833 // the low latency instructions too.
1834 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1835 bool CopyForLowLat = false;
1836 for (SDep& SuccDep : SU->Succs) {
1837 SUnit *Succ = SuccDep.getSUnit();
1838 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1839 continue;
1840 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1841 CopyForLowLat = true;
1842 }
1843 }
1844 if (!CopyForLowLat)
1845 continue;
1846 if (MinPos < i) {
1847 for (unsigned u = i; u > MinPos; --u) {
1848 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1849 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1850 }
1851 ScheduledSUnits[MinPos] = SU->NodeNum;
1852 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1853 }
1854 }
1855 }
1856}
1857
1858void SIScheduleDAGMI::restoreSULinksLeft() {
1859 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1860 SUnits[i].isScheduled = false;
1861 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1862 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1863 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1864 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1865 }
1866}
1867
1868// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1869template<typename _Iterator> void
1870SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1871 unsigned &VgprUsage, unsigned &SgprUsage) {
1872 VgprUsage = 0;
1873 SgprUsage = 0;
1874 for (_Iterator RegI = First; RegI != End; ++RegI) {
1875 Register Reg = *RegI;
1876 // For now only track virtual registers
1877 if (!Reg.isVirtual())
1878 continue;
1879 PSetIterator PSetI = MRI.getPressureSets(Reg);
1880 for (; PSetI.isValid(); ++PSetI) {
1881 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1882 VgprUsage += PSetI.getWeight();
1883 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1884 SgprUsage += PSetI.getWeight();
1885 }
1886 }
1887}
1888
1889void SIScheduleDAGMI::schedule()
1890{
1891 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1892 SIScheduleBlockResult Best, Temp;
1893 LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false)
;
1
Assuming 'DebugFlag' is false
2
Loop condition is false. Exiting loop
1894
1895 buildDAGWithRegPressure();
1896 LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dump(); } } while (false)
;
3
Assuming 'DebugFlag' is false
4
Loop condition is false. Exiting loop
1897
1898 topologicalSort();
1899 findRootsAndBiasEdges(TopRoots, BotRoots);
1900 // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1901 // functions, but to make them happy we must initialize
1902 // the default Scheduler implementation (even if we do not
1903 // run it)
1904 SchedImpl->initialize(this);
1905 initQueues(TopRoots, BotRoots);
1906
1907 // Fill some stats to help scheduling.
1908
1909 SUnitsLinksBackup = SUnits;
1910 IsLowLatencySU.clear();
1911 LowLatencyOffset.clear();
1912 IsHighLatencySU.clear();
1913
1914 IsLowLatencySU.resize(SUnits.size(), 0);
1915 LowLatencyOffset.resize(SUnits.size(), 0);
1916 IsHighLatencySU.resize(SUnits.size(), 0);
1917
1918 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
5
Assuming 'i' is equal to 'e'
6
Loop condition is false. Execution continues on line 1932
1919 SUnit *SU = &SUnits[i];
1920 const MachineOperand *BaseLatOp;
1921 int64_t OffLatReg;
1922 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1923 IsLowLatencySU[i] = 1;
1924 bool OffsetIsScalable;
1925 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1926 OffsetIsScalable, TRI))
1927 LowLatencyOffset[i] = OffLatReg;
1928 } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
1929 IsHighLatencySU[i] = 1;
1930 }
1931
1932 SIScheduler Scheduler(this);
1933 Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
7
Calling 'SIScheduler::scheduleVariant'
1934 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1935
1936 // if VGPR usage is extremely high, try other good performing variants
1937 // which could lead to lower VGPR usage
1938 if (Best.MaxVGPRUsage > 180) {
1939 static const std::pair<SISchedulerBlockCreatorVariant,
1940 SISchedulerBlockSchedulerVariant>
1941 Variants[] = {
1942 { LatenciesAlone, BlockRegUsageLatency },
1943// { LatenciesAlone, BlockRegUsage },
1944 { LatenciesGrouped, BlockLatencyRegUsage },
1945// { LatenciesGrouped, BlockRegUsageLatency },
1946// { LatenciesGrouped, BlockRegUsage },
1947 { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1948// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1949// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1950 };
1951 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1952 Temp = Scheduler.scheduleVariant(v.first, v.second);
1953 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1954 Best = Temp;
1955 }
1956 }
1957 // if VGPR usage is still extremely high, we may spill. Try other variants
1958 // which are less performing, but that could lead to lower VGPR usage.
1959 if (Best.MaxVGPRUsage > 200) {
1960 static const std::pair<SISchedulerBlockCreatorVariant,
1961 SISchedulerBlockSchedulerVariant>
1962 Variants[] = {
1963// { LatenciesAlone, BlockRegUsageLatency },
1964 { LatenciesAlone, BlockRegUsage },
1965// { LatenciesGrouped, BlockLatencyRegUsage },
1966 { LatenciesGrouped, BlockRegUsageLatency },
1967 { LatenciesGrouped, BlockRegUsage },
1968// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1969 { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1970 { LatenciesAlonePlusConsecutive, BlockRegUsage }
1971 };
1972 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1973 Temp = Scheduler.scheduleVariant(v.first, v.second);
1974 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1975 Best = Temp;
1976 }
1977 }
1978
1979 ScheduledSUnits = Best.SUs;
1980 ScheduledSUnitsInv.resize(SUnits.size());
1981
1982 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1983 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1984 }
1985
1986 moveLowLatencies();
1987
1988 // Tell the outside world about the result of the scheduling.
1989
1990 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
&& "bad initial Top tracker") ? void (0) : __assert_fail
("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 1990, __extension__
__PRETTY_FUNCTION__))
;
1991 TopRPTracker.setPos(CurrentTop);
1992
1993 for (unsigned I : ScheduledSUnits) {
1994 SUnit *SU = &SUnits[I];
1995
1996 scheduleMI(SU, true);
1997
1998 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
1999 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
;
2000 }
2001
2002 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
"Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp", 2002, __extension__
__PRETTY_FUNCTION__))
;
2003
2004 placeDebugValues();
2005
2006 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2007 dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2008 << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2009 dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2010 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2011 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
;
2012}