Bug Summary

File:lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:line 1679, column 3
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn329677/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AMDGPU -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-04-11-031539-24776-1 -x c++ /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#include "SIMachineScheduler.h"
16#include "AMDGPU.h"
17#include "SIInstrInfo.h"
18#include "SIRegisterInfo.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/CodeGen/LiveInterval.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/CodeGen/RegisterPressure.h"
27#include "llvm/CodeGen/SlotIndexes.h"
28#include "llvm/CodeGen/TargetRegisterInfo.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/raw_ostream.h"
32#include <algorithm>
33#include <cassert>
34#include <map>
35#include <set>
36#include <utility>
37#include <vector>
38
39using namespace llvm;
40
41#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
42
43// This scheduler implements a different scheduling algorithm than
44// GenericScheduler.
45//
46// There are several specific architecture behaviours that can't be modelled
47// for GenericScheduler:
48// . When accessing the result of an SGPR load instruction, you have to wait
49// for all the SGPR load instructions before your current instruction to
50// have finished.
51// . When accessing the result of an VGPR load instruction, you have to wait
52// for all the VGPR load instructions previous to the VGPR load instruction
53// you are interested in to finish.
54// . The less the register pressure, the best load latencies are hidden
55//
56// Moreover some specifities (like the fact a lot of instructions in the shader
57// have few dependencies) makes the generic scheduler have some unpredictable
58// behaviours. For example when register pressure becomes high, it can either
59// manage to prevent register pressure from going too high, or it can
60// increase register pressure even more than if it hadn't taken register
61// pressure into account.
62//
63// Also some other bad behaviours are generated, like loading at the beginning
64// of the shader a constant in VGPR you won't need until the end of the shader.
65//
66// The scheduling problem for SI can distinguish three main parts:
67// . Hiding high latencies (texture sampling, etc)
68// . Hiding low latencies (SGPR constant loading, etc)
69// . Keeping register usage low for better latency hiding and general
70// performance
71//
72// Some other things can also affect performance, but are hard to predict
73// (cache usage, the fact the HW can issue several instructions from different
74// wavefronts if different types, etc)
75//
76// This scheduler tries to solve the scheduling problem by dividing it into
77// simpler sub-problems. It divides the instructions into blocks, schedules
78// locally inside the blocks where it takes care of low latencies, and then
79// chooses the order of the blocks by taking care of high latencies.
80// Dividing the instructions into blocks helps control keeping register
81// usage low.
82//
83// First the instructions are put into blocks.
84// We want the blocks help control register usage and hide high latencies
85// later. To help control register usage, we typically want all local
86// computations, when for example you create a result that can be comsummed
87// right away, to be contained in a block. Block inputs and outputs would
88// typically be important results that are needed in several locations of
89// the shader. Since we do want blocks to help hide high latencies, we want
90// the instructions inside the block to have a minimal set of dependencies
91// on high latencies. It will make it easy to pick blocks to hide specific
92// high latencies.
93// The block creation algorithm is divided into several steps, and several
94// variants can be tried during the scheduling process.
95//
96// Second the order of the instructions inside the blocks is chosen.
97// At that step we do take into account only register usage and hiding
98// low latency instructions
99//
100// Third the block order is chosen, there we try to hide high latencies
101// and keep register usage low.
102//
103// After the third step, a pass is done to improve the hiding of low
104// latencies.
105//
106// Actually when talking about 'low latency' or 'high latency' it includes
107// both the latency to get the cache (or global mem) data go to the register,
108// and the bandwidth limitations.
109// Increasing the number of active wavefronts helps hide the former, but it
110// doesn't solve the latter, thus why even if wavefront count is high, we have
111// to try have as many instructions hiding high latencies as possible.
112// The OpenCL doc says for example latency of 400 cycles for a global mem access,
113// which is hidden by 10 instructions if the wavefront count is 10.
114
115// Some figures taken from AMD docs:
116// Both texture and constant L1 caches are 4-way associative with 64 bytes
117// lines.
118// Constant cache is shared with 4 CUs.
119// For texture sampling, the address generation unit receives 4 texture
120// addresses per cycle, thus we could expect texture sampling latency to be
121// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
122// instructions in a wavefront group are executed every 4 cycles),
123// or 16 instructions if the other wavefronts associated to the 3 other VALUs
124// of the CU do texture sampling too. (Don't take these figures too seriously,
125// as I'm not 100% sure of the computation)
126// Data exports should get similar latency.
127// For constant loading, the cache is shader with 4 CUs.
128// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
129// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
130// It means a simple s_buffer_load should take one instruction to hide, as
131// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
132// cache line.
133//
134// As of today the driver doesn't preload the constants in cache, thus the
135// first loads get extra latency. The doc says global memory access can be
136// 300-600 cycles. We do not specially take that into account when scheduling
137// As we expect the driver to be able to preload the constants soon.
138
139// common code //
140
141#ifndef NDEBUG
142
143static const char *getReasonStr(SIScheduleCandReason Reason) {
144 switch (Reason) {
145 case NoCand: return "NOCAND";
146 case RegUsage: return "REGUSAGE";
147 case Latency: return "LATENCY";
148 case Successor: return "SUCCESSOR";
149 case Depth: return "DEPTH";
150 case NodeOrder: return "ORDER";
151 }
152 llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 152)
;
153}
154
155#endif
156
157static bool tryLess(int TryVal, int CandVal,
158 SISchedulerCandidate &TryCand,
159 SISchedulerCandidate &Cand,
160 SIScheduleCandReason Reason) {
161 if (TryVal < CandVal) {
162 TryCand.Reason = Reason;
163 return true;
164 }
165 if (TryVal > CandVal) {
166 if (Cand.Reason > Reason)
167 Cand.Reason = Reason;
168 return true;
169 }
170 Cand.setRepeat(Reason);
171 return false;
172}
173
174static bool tryGreater(int TryVal, int CandVal,
175 SISchedulerCandidate &TryCand,
176 SISchedulerCandidate &Cand,
177 SIScheduleCandReason Reason) {
178 if (TryVal > CandVal) {
179 TryCand.Reason = Reason;
180 return true;
181 }
182 if (TryVal < CandVal) {
183 if (Cand.Reason > Reason)
184 Cand.Reason = Reason;
185 return true;
186 }
187 Cand.setRepeat(Reason);
188 return false;
189}
190
191// SIScheduleBlock //
192
193void SIScheduleBlock::addUnit(SUnit *SU) {
194 NodeNum2Index[SU->NodeNum] = SUnits.size();
195 SUnits.push_back(SU);
196}
197
198#ifndef NDEBUG
199void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
200
201 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
202 dbgs() << '\n';
203}
204#endif
205
206void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
207 SISchedCandidate &TryCand) {
208 // Initialize the candidate if needed.
209 if (!Cand.isValid()) {
210 TryCand.Reason = NodeOrder;
211 return;
212 }
213
214 if (Cand.SGPRUsage > 60 &&
215 tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
216 return;
217
218 // Schedule low latency instructions as top as possible.
219 // Order of priority is:
220 // . Low latency instructions which do not depend on other low latency
221 // instructions we haven't waited for
222 // . Other instructions which do not depend on low latency instructions
223 // we haven't waited for
224 // . Low latencies
225 // . All other instructions
226 // Goal is to get: low latency instructions - independent instructions
227 // - (eventually some more low latency instructions)
228 // - instructions that depend on the first low latency instructions.
229 // If in the block there is a lot of constant loads, the SGPR usage
230 // could go quite high, thus above the arbitrary limit of 60 will encourage
231 // use the already loaded constants (in order to release some SGPRs) before
232 // loading more.
233 if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
234 Cand.HasLowLatencyNonWaitedParent,
235 TryCand, Cand, SIScheduleCandReason::Depth))
236 return;
237
238 if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
239 TryCand, Cand, SIScheduleCandReason::Depth))
240 return;
241
242 if (TryCand.IsLowLatency &&
243 tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
244 TryCand, Cand, SIScheduleCandReason::Depth))
245 return;
246
247 if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
248 return;
249
250 // Fall through to original instruction order.
251 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
252 TryCand.Reason = NodeOrder;
253 }
254}
255
256SUnit* SIScheduleBlock::pickNode() {
257 SISchedCandidate TopCand;
258
259 for (SUnit* SU : TopReadySUs) {
260 SISchedCandidate TryCand;
261 std::vector<unsigned> pressure;
262 std::vector<unsigned> MaxPressure;
263 // Predict register usage after this instruction.
264 TryCand.SU = SU;
265 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
266 TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
267 TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
268 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
269 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
270 TryCand.HasLowLatencyNonWaitedParent =
271 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
272 tryCandidateTopDown(TopCand, TryCand);
273 if (TryCand.Reason != NoCand)
274 TopCand.setBest(TryCand);
275 }
276
277 return TopCand.SU;
278}
279
280
281// Schedule something valid.
282void SIScheduleBlock::fastSchedule() {
283 TopReadySUs.clear();
284 if (Scheduled)
285 undoSchedule();
286
287 for (SUnit* SU : SUnits) {
288 if (!SU->NumPredsLeft)
289 TopReadySUs.push_back(SU);
290 }
291
292 while (!TopReadySUs.empty()) {
293 SUnit *SU = TopReadySUs[0];
294 ScheduledSUnits.push_back(SU);
295 nodeScheduled(SU);
296 }
297
298 Scheduled = true;
299}
300
301// Returns if the register was set between first and last.
302static bool isDefBetween(unsigned Reg,
303 SlotIndex First, SlotIndex Last,
304 const MachineRegisterInfo *MRI,
305 const LiveIntervals *LIS) {
306 for (MachineRegisterInfo::def_instr_iterator
307 UI = MRI->def_instr_begin(Reg),
308 UE = MRI->def_instr_end(); UI != UE; ++UI) {
309 const MachineInstr* MI = &*UI;
310 if (MI->isDebugValue())
311 continue;
312 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
313 if (InstSlot >= First && InstSlot <= Last)
314 return true;
315 }
316 return false;
317}
318
319void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
320 MachineBasicBlock::iterator EndBlock) {
321 IntervalPressure Pressure, BotPressure;
322 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
323 LiveIntervals *LIS = DAG->getLIS();
324 MachineRegisterInfo *MRI = DAG->getMRI();
325 DAG->initRPTracker(TopRPTracker);
326 DAG->initRPTracker(BotRPTracker);
327 DAG->initRPTracker(RPTracker);
328
329 // Goes though all SU. RPTracker captures what had to be alive for the SUs
330 // to execute, and what is still alive at the end.
331 for (SUnit* SU : ScheduledSUnits) {
332 RPTracker.setPos(SU->getInstr());
333 RPTracker.advance();
334 }
335
336 // Close the RPTracker to finalize live ins/outs.
337 RPTracker.closeRegion();
338
339 // Initialize the live ins and live outs.
340 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
341 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
342
343 // Do not Track Physical Registers, because it messes up.
344 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
345 if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit))
346 LiveInRegs.insert(RegMaskPair.RegUnit);
347 }
348 LiveOutRegs.clear();
349 // There is several possibilities to distinguish:
350 // 1) Reg is not input to any instruction in the block, but is output of one
351 // 2) 1) + read in the block and not needed after it
352 // 3) 1) + read in the block but needed in another block
353 // 4) Reg is input of an instruction but another block will read it too
354 // 5) Reg is input of an instruction and then rewritten in the block.
355 // result is not read in the block (implies used in another block)
356 // 6) Reg is input of an instruction and then rewritten in the block.
357 // result is read in the block and not needed in another block
358 // 7) Reg is input of an instruction and then rewritten in the block.
359 // result is read in the block but also needed in another block
360 // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
361 // We want LiveOutRegs to contain only Regs whose content will be read after
362 // in another block, and whose content was written in the current block,
363 // that is we want it to get 1, 3, 5, 7
364 // Since we made the MIs of a block to be packed all together before
365 // scheduling, then the LiveIntervals were correct, and the RPTracker was
366 // able to correctly handle 5 vs 6, 2 vs 3.
367 // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
368 // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
369 // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
370 // The use of findDefBetween removes the case 4.
371 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
372 unsigned Reg = RegMaskPair.RegUnit;
373 if (TargetRegisterInfo::isVirtualRegister(Reg) &&
374 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
375 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
376 LIS)) {
377 LiveOutRegs.insert(Reg);
378 }
379 }
380
381 // Pressure = sum_alive_registers register size
382 // Internally llvm will represent some registers as big 128 bits registers
383 // for example, but they actually correspond to 4 actual 32 bits registers.
384 // Thus Pressure is not equal to num_alive_registers * constant.
385 LiveInPressure = TopPressure.MaxSetPressure;
386 LiveOutPressure = BotPressure.MaxSetPressure;
387
388 // Prepares TopRPTracker for top down scheduling.
389 TopRPTracker.closeTop();
390}
391
392void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
393 MachineBasicBlock::iterator EndBlock) {
394 if (!Scheduled)
395 fastSchedule();
396
397 // PreScheduling phase to set LiveIn and LiveOut.
398 initRegPressure(BeginBlock, EndBlock);
399 undoSchedule();
400
401 // Schedule for real now.
402
403 TopReadySUs.clear();
404
405 for (SUnit* SU : SUnits) {
406 if (!SU->NumPredsLeft)
407 TopReadySUs.push_back(SU);
408 }
409
410 while (!TopReadySUs.empty()) {
411 SUnit *SU = pickNode();
412 ScheduledSUnits.push_back(SU);
413 TopRPTracker.setPos(SU->getInstr());
414 TopRPTracker.advance();
415 nodeScheduled(SU);
416 }
417
418 // TODO: compute InternalAdditionnalPressure.
419 InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
420
421 // Check everything is right.
422#ifndef NDEBUG
423 assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __extension__ __PRETTY_FUNCTION__))
424 TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __extension__ __PRETTY_FUNCTION__))
;
425 for (SUnit* SU : SUnits) {
426 assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __extension__ __PRETTY_FUNCTION__))
427 SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __extension__ __PRETTY_FUNCTION__))
;
428 }
429#endif
430
431 Scheduled = true;
432}
433
434void SIScheduleBlock::undoSchedule() {
435 for (SUnit* SU : SUnits) {
436 SU->isScheduled = false;
437 for (SDep& Succ : SU->Succs) {
438 if (BC->isSUInBlock(Succ.getSUnit(), ID))
439 undoReleaseSucc(SU, &Succ);
440 }
441 }
442 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
443 ScheduledSUnits.clear();
444 Scheduled = false;
445}
446
447void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
448 SUnit *SuccSU = SuccEdge->getSUnit();
449
450 if (SuccEdge->isWeak()) {
451 ++SuccSU->WeakPredsLeft;
452 return;
453 }
454 ++SuccSU->NumPredsLeft;
455}
456
457void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
458 SUnit *SuccSU = SuccEdge->getSUnit();
459
460 if (SuccEdge->isWeak()) {
461 --SuccSU->WeakPredsLeft;
462 return;
463 }
464#ifndef NDEBUG
465 if (SuccSU->NumPredsLeft == 0) {
466 dbgs() << "*** Scheduling failed! ***\n";
467 SuccSU->dump(DAG);
468 dbgs() << " has been released too many times!\n";
469 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 469)
;
470 }
471#endif
472
473 --SuccSU->NumPredsLeft;
474}
475
476/// Release Successors of the SU that are in the block or not.
477void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
478 for (SDep& Succ : SU->Succs) {
479 SUnit *SuccSU = Succ.getSUnit();
480
481 if (SuccSU->NodeNum >= DAG->SUnits.size())
482 continue;
483
484 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
485 continue;
486
487 releaseSucc(SU, &Succ);
488 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
489 TopReadySUs.push_back(SuccSU);
490 }
491}
492
493void SIScheduleBlock::nodeScheduled(SUnit *SU) {
494 // Is in TopReadySUs
495 assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
__assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 495, __extension__ __PRETTY_FUNCTION__))
;
496 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
497 if (I == TopReadySUs.end()) {
498 dbgs() << "Data Structure Bug in SI Scheduler\n";
499 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 499)
;
500 }
501 TopReadySUs.erase(I);
502
503 releaseSuccessors(SU, true);
504 // Scheduling this node will trigger a wait,
505 // thus propagate to other instructions that they do not need to wait either.
506 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
507 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
508
509 if (DAG->IsLowLatencySU[SU->NodeNum]) {
510 for (SDep& Succ : SU->Succs) {
511 std::map<unsigned, unsigned>::iterator I =
512 NodeNum2Index.find(Succ.getSUnit()->NodeNum);
513 if (I != NodeNum2Index.end())
514 HasLowLatencyNonWaitedParent[I->second] = 1;
515 }
516 }
517 SU->isScheduled = true;
518}
519
520void SIScheduleBlock::finalizeUnits() {
521 // We remove links from outside blocks to enable scheduling inside the block.
522 for (SUnit* SU : SUnits) {
523 releaseSuccessors(SU, false);
524 if (DAG->IsHighLatencySU[SU->NodeNum])
525 HighLatencyBlock = true;
526 }
527 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
528}
529
530// we maintain ascending order of IDs
531void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
532 unsigned PredID = Pred->getID();
533
534 // Check if not already predecessor.
535 for (SIScheduleBlock* P : Preds) {
536 if (PredID == P->getID())
537 return;
538 }
539 Preds.push_back(Pred);
540
541 assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
542 [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
543 SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
544 return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
545 }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
546 "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __extension__ __PRETTY_FUNCTION__))
;
547}
548
549void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
550 SIScheduleBlockLinkKind Kind) {
551 unsigned SuccID = Succ->getID();
552
553 // Check if not already predecessor.
554 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
555 if (SuccID == S.first->getID()) {
556 if (S.second == SIScheduleBlockLinkKind::NoData &&
557 Kind == SIScheduleBlockLinkKind::Data)
558 S.second = Kind;
559 return;
560 }
561 }
562 if (Succ->isHighLatencyBlock())
563 ++NumHighLatencySuccessors;
564 Succs.push_back(std::make_pair(Succ, Kind));
565
566 assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
567 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
568 "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __extension__ __PRETTY_FUNCTION__))
;
569}
570
571#ifndef NDEBUG
572void SIScheduleBlock::printDebug(bool full) {
573 dbgs() << "Block (" << ID << ")\n";
574 if (!full)
575 return;
576
577 dbgs() << "\nContains High Latency Instruction: "
578 << HighLatencyBlock << '\n';
579 dbgs() << "\nDepends On:\n";
580 for (SIScheduleBlock* P : Preds) {
581 P->printDebug(false);
582 }
583
584 dbgs() << "\nSuccessors:\n";
585 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
586 if (S.second == SIScheduleBlockLinkKind::Data)
587 dbgs() << "(Data Dep) ";
588 S.first->printDebug(false);
589 }
590
591 if (Scheduled) {
592 dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
593 << LiveInPressure[DAG->getVGPRSetID()] << '\n';
594 dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
595 << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
596 dbgs() << "LiveIns:\n";
597 for (unsigned Reg : LiveInRegs)
598 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
599
600 dbgs() << "\nLiveOuts:\n";
601 for (unsigned Reg : LiveOutRegs)
602 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
603 }
604
605 dbgs() << "\nInstructions:\n";
606 if (!Scheduled) {
607 for (SUnit* SU : SUnits) {
608 SU->dump(DAG);
609 }
610 } else {
611 for (SUnit* SU : SUnits) {
612 SU->dump(DAG);
613 }
614 }
615
616 dbgs() << "///////////////////////\n";
617}
618#endif
619
620// SIScheduleBlockCreator //
621
622SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
623DAG(DAG) {
624}
625
626SIScheduleBlockCreator::~SIScheduleBlockCreator() = default;
627
628SIScheduleBlocks
629SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
630 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
631 Blocks.find(BlockVariant);
632 if (B == Blocks.end()) {
633 SIScheduleBlocks Res;
634 createBlocksForVariant(BlockVariant);
635 topologicalSort();
636 scheduleInsideBlocks();
637 fillStats();
638 Res.Blocks = CurrentBlocks;
639 Res.TopDownIndex2Block = TopDownIndex2Block;
640 Res.TopDownBlock2Index = TopDownBlock2Index;
641 Blocks[BlockVariant] = Res;
642 return Res;
643 } else {
644 return B->second;
645 }
646}
647
648bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
649 if (SU->NodeNum >= DAG->SUnits.size())
650 return false;
651 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
652}
653
654void SIScheduleBlockCreator::colorHighLatenciesAlone() {
655 unsigned DAGSize = DAG->SUnits.size();
656
657 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
658 SUnit *SU = &DAG->SUnits[i];
659 if (DAG->IsHighLatencySU[SU->NodeNum]) {
660 CurrentColoring[SU->NodeNum] = NextReservedID++;
661 }
662 }
663}
664
665static bool
666hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
667 for (const auto &PredDep : SU.Preds) {
668 if (PredDep.getSUnit() == &FromSU &&
669 PredDep.getKind() == llvm::SDep::Data)
670 return true;
671 }
672 return false;
673}
674
675void SIScheduleBlockCreator::colorHighLatenciesGroups() {
676 unsigned DAGSize = DAG->SUnits.size();
677 unsigned NumHighLatencies = 0;
678 unsigned GroupSize;
679 int Color = NextReservedID;
680 unsigned Count = 0;
681 std::set<unsigned> FormingGroup;
682
683 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
684 SUnit *SU = &DAG->SUnits[i];
685 if (DAG->IsHighLatencySU[SU->NodeNum])
686 ++NumHighLatencies;
687 }
688
689 if (NumHighLatencies == 0)
690 return;
691
692 if (NumHighLatencies <= 6)
693 GroupSize = 2;
694 else if (NumHighLatencies <= 12)
695 GroupSize = 3;
696 else
697 GroupSize = 4;
698
699 for (unsigned SUNum : DAG->TopDownIndex2SU) {
700 const SUnit &SU = DAG->SUnits[SUNum];
701 if (DAG->IsHighLatencySU[SU.NodeNum]) {
702 unsigned CompatibleGroup = true;
703 int ProposedColor = Color;
704 std::vector<int> AdditionalElements;
705
706 // We don't want to put in the same block
707 // two high latency instructions that depend
708 // on each other.
709 // One way would be to check canAddEdge
710 // in both directions, but that currently is not
711 // enough because there the high latency order is
712 // enforced (via links).
713 // Instead, look at the dependencies between the
714 // high latency instructions and deduce if it is
715 // a data dependency or not.
716 for (unsigned j : FormingGroup) {
717 bool HasSubGraph;
718 std::vector<int> SubGraph;
719 // By construction (topological order), if SU and
720 // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
721 // in the parent graph of SU.
722#ifndef NDEBUG
723 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
724 HasSubGraph);
725 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 725, __extension__ __PRETTY_FUNCTION__))
;
726#endif
727 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
728 HasSubGraph);
729 if (!HasSubGraph)
730 continue; // No dependencies between each other
731 else if (SubGraph.size() > 5) {
732 // Too many elements would be required to be added to the block.
733 CompatibleGroup = false;
734 break;
735 }
736 else {
737 // Check the type of dependency
738 for (unsigned k : SubGraph) {
739 // If in the path to join the two instructions,
740 // there is another high latency instruction,
741 // or instructions colored for another block
742 // abort the merge.
743 if (DAG->IsHighLatencySU[k] ||
744 (CurrentColoring[k] != ProposedColor &&
745 CurrentColoring[k] != 0)) {
746 CompatibleGroup = false;
747 break;
748 }
749 // If one of the SU in the subgraph depends on the result of SU j,
750 // there'll be a data dependency.
751 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
752 CompatibleGroup = false;
753 break;
754 }
755 }
756 if (!CompatibleGroup)
757 break;
758 // Same check for the SU
759 if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
760 CompatibleGroup = false;
761 break;
762 }
763 // Add all the required instructions to the block
764 // These cannot live in another block (because they
765 // depend (order dependency) on one of the
766 // instruction in the block, and are required for the
767 // high latency instruction we add.
768 AdditionalElements.insert(AdditionalElements.end(),
769 SubGraph.begin(), SubGraph.end());
770 }
771 }
772 if (CompatibleGroup) {
773 FormingGroup.insert(SU.NodeNum);
774 for (unsigned j : AdditionalElements)
775 CurrentColoring[j] = ProposedColor;
776 CurrentColoring[SU.NodeNum] = ProposedColor;
777 ++Count;
778 }
779 // Found one incompatible instruction,
780 // or has filled a big enough group.
781 // -> start a new one.
782 if (!CompatibleGroup) {
783 FormingGroup.clear();
784 Color = ++NextReservedID;
785 ProposedColor = Color;
786 FormingGroup.insert(SU.NodeNum);
787 CurrentColoring[SU.NodeNum] = ProposedColor;
788 Count = 0;
789 } else if (Count == GroupSize) {
790 FormingGroup.clear();
791 Color = ++NextReservedID;
792 ProposedColor = Color;
793 Count = 0;
794 }
795 }
796 }
797}
798
799void SIScheduleBlockCreator::colorComputeReservedDependencies() {
800 unsigned DAGSize = DAG->SUnits.size();
801 std::map<std::set<unsigned>, unsigned> ColorCombinations;
802
803 CurrentTopDownReservedDependencyColoring.clear();
804 CurrentBottomUpReservedDependencyColoring.clear();
805
806 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
807 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
808
809 // Traverse TopDown, and give different colors to SUs depending
810 // on which combination of High Latencies they depend on.
811
812 for (unsigned SUNum : DAG->TopDownIndex2SU) {
813 SUnit *SU = &DAG->SUnits[SUNum];
814 std::set<unsigned> SUColors;
815
816 // Already given.
817 if (CurrentColoring[SU->NodeNum]) {
818 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
819 CurrentColoring[SU->NodeNum];
820 continue;
821 }
822
823 for (SDep& PredDep : SU->Preds) {
824 SUnit *Pred = PredDep.getSUnit();
825 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
826 continue;
827 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
828 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
829 }
830 // Color 0 by default.
831 if (SUColors.empty())
832 continue;
833 // Same color than parents.
834 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
835 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
836 *SUColors.begin();
837 else {
838 std::map<std::set<unsigned>, unsigned>::iterator Pos =
839 ColorCombinations.find(SUColors);
840 if (Pos != ColorCombinations.end()) {
841 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
842 } else {
843 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
844 NextNonReservedID;
845 ColorCombinations[SUColors] = NextNonReservedID++;
846 }
847 }
848 }
849
850 ColorCombinations.clear();
851
852 // Same as before, but BottomUp.
853
854 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
855 SUnit *SU = &DAG->SUnits[SUNum];
856 std::set<unsigned> SUColors;
857
858 // Already given.
859 if (CurrentColoring[SU->NodeNum]) {
860 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
861 CurrentColoring[SU->NodeNum];
862 continue;
863 }
864
865 for (SDep& SuccDep : SU->Succs) {
866 SUnit *Succ = SuccDep.getSUnit();
867 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
868 continue;
869 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
870 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
871 }
872 // Keep color 0.
873 if (SUColors.empty())
874 continue;
875 // Same color than parents.
876 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
877 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
878 *SUColors.begin();
879 else {
880 std::map<std::set<unsigned>, unsigned>::iterator Pos =
881 ColorCombinations.find(SUColors);
882 if (Pos != ColorCombinations.end()) {
883 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
884 } else {
885 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
886 NextNonReservedID;
887 ColorCombinations[SUColors] = NextNonReservedID++;
888 }
889 }
890 }
891}
892
893void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
894 unsigned DAGSize = DAG->SUnits.size();
895 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
896
897 // Every combination of colors given by the top down
898 // and bottom up Reserved node dependency
899
900 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
901 SUnit *SU = &DAG->SUnits[i];
902 std::pair<unsigned, unsigned> SUColors;
903
904 // High latency instructions: already given.
905 if (CurrentColoring[SU->NodeNum])
906 continue;
907
908 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
909 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
910
911 std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
912 ColorCombinations.find(SUColors);
913 if (Pos != ColorCombinations.end()) {
914 CurrentColoring[SU->NodeNum] = Pos->second;
915 } else {
916 CurrentColoring[SU->NodeNum] = NextNonReservedID;
917 ColorCombinations[SUColors] = NextNonReservedID++;
918 }
919 }
920}
921
922void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
923 unsigned DAGSize = DAG->SUnits.size();
924 std::vector<int> PendingColoring = CurrentColoring;
925
926 assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__))
927 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__))
928 CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __extension__ __PRETTY_FUNCTION__))
;
929 // If there is no reserved block at all, do nothing. We don't want
930 // everything in one block.
931 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
932 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
933 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
934 CurrentTopDownReservedDependencyColoring.end()) == 0)
935 return;
936
937 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
938 SUnit *SU = &DAG->SUnits[SUNum];
939 std::set<unsigned> SUColors;
940 std::set<unsigned> SUColorsPending;
941
942 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
943 continue;
944
945 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
946 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
947 continue;
948
949 for (SDep& SuccDep : SU->Succs) {
950 SUnit *Succ = SuccDep.getSUnit();
951 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
952 continue;
953 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
954 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
955 SUColors.insert(CurrentColoring[Succ->NodeNum]);
956 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
957 }
958 // If there is only one child/parent block, and that block
959 // is not among the ones we are removing in this path, then
960 // merge the instruction to that block
961 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
962 PendingColoring[SU->NodeNum] = *SUColors.begin();
963 else // TODO: Attribute new colors depending on color
964 // combination of children.
965 PendingColoring[SU->NodeNum] = NextNonReservedID++;
966 }
967 CurrentColoring = PendingColoring;
968}
969
970
971void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
972 unsigned DAGSize = DAG->SUnits.size();
973 unsigned PreviousColor;
974 std::set<unsigned> SeenColors;
975
976 if (DAGSize <= 1)
977 return;
978
979 PreviousColor = CurrentColoring[0];
980
981 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
982 SUnit *SU = &DAG->SUnits[i];
983 unsigned CurrentColor = CurrentColoring[i];
984 unsigned PreviousColorSave = PreviousColor;
985 assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 985, __extension__ __PRETTY_FUNCTION__))
;
986
987 if (CurrentColor != PreviousColor)
988 SeenColors.insert(PreviousColor);
989 PreviousColor = CurrentColor;
990
991 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
992 continue;
993
994 if (SeenColors.find(CurrentColor) == SeenColors.end())
995 continue;
996
997 if (PreviousColorSave != CurrentColor)
998 CurrentColoring[i] = NextNonReservedID++;
999 else
1000 CurrentColoring[i] = CurrentColoring[i-1];
1001 }
1002}
1003
1004void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
1005 unsigned DAGSize = DAG->SUnits.size();
1006
1007 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1008 SUnit *SU = &DAG->SUnits[SUNum];
1009 std::set<unsigned> SUColors;
1010
1011 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1012 continue;
1013
1014 // No predecessor: Vgpr constant loading.
1015 // Low latency instructions usually have a predecessor (the address)
1016 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
1017 continue;
1018
1019 for (SDep& SuccDep : SU->Succs) {
1020 SUnit *Succ = SuccDep.getSUnit();
1021 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1022 continue;
1023 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1024 }
1025 if (SUColors.size() == 1)
1026 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1027 }
1028}
1029
1030void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1031 unsigned DAGSize = DAG->SUnits.size();
1032
1033 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1034 SUnit *SU = &DAG->SUnits[SUNum];
1035 std::set<unsigned> SUColors;
1036
1037 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1038 continue;
1039
1040 for (SDep& SuccDep : SU->Succs) {
1041 SUnit *Succ = SuccDep.getSUnit();
1042 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1043 continue;
1044 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1045 }
1046 if (SUColors.size() == 1)
1047 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1048 }
1049}
1050
1051void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1052 unsigned DAGSize = DAG->SUnits.size();
1053
1054 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1055 SUnit *SU = &DAG->SUnits[SUNum];
1056 std::set<unsigned> SUColors;
1057
1058 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1059 continue;
1060
1061 for (SDep& SuccDep : SU->Succs) {
1062 SUnit *Succ = SuccDep.getSUnit();
1063 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1064 continue;
1065 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1066 }
1067 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1068 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1069 }
1070}
1071
1072void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1073 unsigned DAGSize = DAG->SUnits.size();
1074 std::map<unsigned, unsigned> ColorCount;
1075
1076 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1077 SUnit *SU = &DAG->SUnits[SUNum];
1078 unsigned color = CurrentColoring[SU->NodeNum];
1079 ++ColorCount[color];
1080 }
1081
1082 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1083 SUnit *SU = &DAG->SUnits[SUNum];
1084 unsigned color = CurrentColoring[SU->NodeNum];
1085 std::set<unsigned> SUColors;
1086
1087 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1088 continue;
1089
1090 if (ColorCount[color] > 1)
1091 continue;
1092
1093 for (SDep& SuccDep : SU->Succs) {
1094 SUnit *Succ = SuccDep.getSUnit();
1095 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1096 continue;
1097 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1098 }
1099 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1100 --ColorCount[color];
1101 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1102 ++ColorCount[*SUColors.begin()];
1103 }
1104 }
1105}
1106
1107void SIScheduleBlockCreator::cutHugeBlocks() {
1108 // TODO
1109}
1110
1111void SIScheduleBlockCreator::regroupNoUserInstructions() {
1112 unsigned DAGSize = DAG->SUnits.size();
1113 int GroupID = NextNonReservedID++;
1114
1115 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1116 SUnit *SU = &DAG->SUnits[SUNum];
1117 bool hasSuccessor = false;
1118
1119 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1120 continue;
1121
1122 for (SDep& SuccDep : SU->Succs) {
1123 SUnit *Succ = SuccDep.getSUnit();
1124 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1125 continue;
1126 hasSuccessor = true;
1127 }
1128 if (!hasSuccessor)
1129 CurrentColoring[SU->NodeNum] = GroupID;
1130 }
1131}
1132
1133void SIScheduleBlockCreator::colorExports() {
1134 unsigned ExportColor = NextNonReservedID++;
1135 SmallVector<unsigned, 8> ExpGroup;
1136
1137 // Put all exports together in a block.
1138 // The block will naturally end up being scheduled last,
1139 // thus putting exports at the end of the schedule, which
1140 // is better for performance.
1141 // However we must ensure, for safety, the exports can be put
1142 // together in the same block without any other instruction.
1143 // This could happen, for example, when scheduling after regalloc
1144 // if reloading a spilled register from memory using the same
1145 // register than used in a previous export.
1146 // If that happens, do not regroup the exports.
1147 for (unsigned SUNum : DAG->TopDownIndex2SU) {
1148 const SUnit &SU = DAG->SUnits[SUNum];
1149 if (SIInstrInfo::isEXP(*SU.getInstr())) {
1150 // Check the EXP can be added to the group safely,
1151 // ie without needing any other instruction.
1152 // The EXP is allowed to depend on other EXP
1153 // (they will be in the same group).
1154 for (unsigned j : ExpGroup) {
1155 bool HasSubGraph;
1156 std::vector<int> SubGraph;
1157 // By construction (topological order), if SU and
1158 // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
1159 // in the parent graph of SU.
1160#ifndef NDEBUG
1161 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1162 HasSubGraph);
1163 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1163, __extension__ __PRETTY_FUNCTION__))
;
1164#endif
1165 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1166 HasSubGraph);
1167 if (!HasSubGraph)
1168 continue; // No dependencies between each other
1169
1170 // SubGraph contains all the instructions required
1171 // between EXP SUnits[j] and EXP SU.
1172 for (unsigned k : SubGraph) {
1173 if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1174 // Other instructions than EXP would be required in the group.
1175 // Abort the groupping.
1176 return;
1177 }
1178 }
1179
1180 ExpGroup.push_back(SUNum);
1181 }
1182 }
1183
1184 // The group can be formed. Give the color.
1185 for (unsigned j : ExpGroup)
1186 CurrentColoring[j] = ExportColor;
1187}
1188
1189void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1190 unsigned DAGSize = DAG->SUnits.size();
1191 std::map<unsigned,unsigned> RealID;
1192
1193 CurrentBlocks.clear();
1194 CurrentColoring.clear();
1195 CurrentColoring.resize(DAGSize, 0);
1196 Node2CurrentBlock.clear();
1197
1198 // Restore links previous scheduling variant has overridden.
1199 DAG->restoreSULinksLeft();
1200
1201 NextReservedID = 1;
1202 NextNonReservedID = DAGSize + 1;
1203
1204 DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false)
;
1205
1206 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1207 colorHighLatenciesGroups();
1208 else
1209 colorHighLatenciesAlone();
1210 colorComputeReservedDependencies();
1211 colorAccordingToReservedDependencies();
1212 colorEndsAccordingToDependencies();
1213 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1214 colorForceConsecutiveOrderInGroup();
1215 regroupNoUserInstructions();
1216 colorMergeConstantLoadsNextGroup();
1217 colorMergeIfPossibleNextGroupOnlyForReserved();
1218 colorExports();
1219
1220 // Put SUs of same color into same block
1221 Node2CurrentBlock.resize(DAGSize, -1);
1222 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1223 SUnit *SU = &DAG->SUnits[i];
1224 unsigned Color = CurrentColoring[SU->NodeNum];
1225 if (RealID.find(Color) == RealID.end()) {
1226 int ID = CurrentBlocks.size();
1227 BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG, this, ID));
1228 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1229 RealID[Color] = ID;
1230 }
1231 CurrentBlocks[RealID[Color]]->addUnit(SU);
1232 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1233 }
1234
1235 // Build dependencies between blocks.
1236 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1237 SUnit *SU = &DAG->SUnits[i];
1238 int SUID = Node2CurrentBlock[i];
1239 for (SDep& SuccDep : SU->Succs) {
1240 SUnit *Succ = SuccDep.getSUnit();
1241 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1242 continue;
1243 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1244 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1245 SuccDep.isCtrl() ? NoData : Data);
1246 }
1247 for (SDep& PredDep : SU->Preds) {
1248 SUnit *Pred = PredDep.getSUnit();
1249 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1250 continue;
1251 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1252 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1253 }
1254 }
1255
1256 // Free root and leafs of all blocks to enable scheduling inside them.
1257 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1258 SIScheduleBlock *Block = CurrentBlocks[i];
1259 Block->finalizeUnits();
1260 }
1261 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1262 dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1263 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1264 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1265 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1266 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1267 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
;
1268}
1269
1270// Two functions taken from Codegen/MachineScheduler.cpp
1271
1272/// Non-const version.
1273static MachineBasicBlock::iterator
1274nextIfDebug(MachineBasicBlock::iterator I,
1275 MachineBasicBlock::const_iterator End) {
1276 for (; I != End; ++I) {
1277 if (!I->isDebugValue())
1278 break;
1279 }
1280 return I;
1281}
1282
1283void SIScheduleBlockCreator::topologicalSort() {
1284 unsigned DAGSize = CurrentBlocks.size();
1285 std::vector<int> WorkList;
1286
1287 DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false)
;
1288
1289 WorkList.reserve(DAGSize);
1290 TopDownIndex2Block.resize(DAGSize);
1291 TopDownBlock2Index.resize(DAGSize);
1292 BottomUpIndex2Block.resize(DAGSize);
1293
1294 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1295 SIScheduleBlock *Block = CurrentBlocks[i];
1296 unsigned Degree = Block->getSuccs().size();
1297 TopDownBlock2Index[i] = Degree;
1298 if (Degree == 0) {
1299 WorkList.push_back(i);
1300 }
1301 }
1302
1303 int Id = DAGSize;
1304 while (!WorkList.empty()) {
1305 int i = WorkList.back();
1306 SIScheduleBlock *Block = CurrentBlocks[i];
1307 WorkList.pop_back();
1308 TopDownBlock2Index[i] = --Id;
1309 TopDownIndex2Block[Id] = i;
1310 for (SIScheduleBlock* Pred : Block->getPreds()) {
1311 if (!--TopDownBlock2Index[Pred->getID()])
1312 WorkList.push_back(Pred->getID());
1313 }
1314 }
1315
1316#ifndef NDEBUG
1317 // Check correctness of the ordering.
1318 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1319 SIScheduleBlock *Block = CurrentBlocks[i];
1320 for (SIScheduleBlock* Pred : Block->getPreds()) {
1321 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1322, __extension__ __PRETTY_FUNCTION__))
1322 "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1322, __extension__ __PRETTY_FUNCTION__))
;
1323 }
1324 }
1325#endif
1326
1327 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1328 TopDownIndex2Block.rend());
1329}
1330
1331void SIScheduleBlockCreator::scheduleInsideBlocks() {
1332 unsigned DAGSize = CurrentBlocks.size();
1333
1334 DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false)
;
1335
1336 // We do schedule a valid scheduling such that a Block corresponds
1337 // to a range of instructions.
1338 DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false)
;
1339 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1340 SIScheduleBlock *Block = CurrentBlocks[i];
1341 Block->fastSchedule();
1342 }
1343
1344 // Note: the following code, and the part restoring previous position
1345 // is by far the most expensive operation of the Scheduler.
1346
1347 // Do not update CurrentTop.
1348 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1349 std::vector<MachineBasicBlock::iterator> PosOld;
1350 std::vector<MachineBasicBlock::iterator> PosNew;
1351 PosOld.reserve(DAG->SUnits.size());
1352 PosNew.reserve(DAG->SUnits.size());
1353
1354 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1355 int BlockIndice = TopDownIndex2Block[i];
1356 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1357 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1358
1359 for (SUnit* SU : SUs) {
1360 MachineInstr *MI = SU->getInstr();
1361 MachineBasicBlock::iterator Pos = MI;
1362 PosOld.push_back(Pos);
1363 if (&*CurrentTopFastSched == MI) {
1364 PosNew.push_back(Pos);
1365 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1366 DAG->getCurrentBottom());
1367 } else {
1368 // Update the instruction stream.
1369 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1370
1371 // Update LiveIntervals.
1372 // Note: Moving all instructions and calling handleMove every time
1373 // is the most cpu intensive operation of the scheduler.
1374 // It would gain a lot if there was a way to recompute the
1375 // LiveIntervals for the entire scheduling region.
1376 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1377 PosNew.push_back(CurrentTopFastSched);
1378 }
1379 }
1380 }
1381
1382 // Now we have Block of SUs == Block of MI.
1383 // We do the final schedule for the instructions inside the block.
1384 // The property that all the SUs of the Block are grouped together as MI
1385 // is used for correct reg usage tracking.
1386 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1387 SIScheduleBlock *Block = CurrentBlocks[i];
1388 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1389 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1390 }
1391
1392 DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false)
;
1393 // Restore old ordering (which prevents a LIS->handleMove bug).
1394 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1395 MachineBasicBlock::iterator POld = PosOld[i-1];
1396 MachineBasicBlock::iterator PNew = PosNew[i-1];
1397 if (PNew != POld) {
1398 // Update the instruction stream.
1399 DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1400
1401 // Update LiveIntervals.
1402 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1403 }
1404 }
1405
1406 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1407 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1408 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1409 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1410 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1411 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
;
1412}
1413
1414void SIScheduleBlockCreator::fillStats() {
1415 unsigned DAGSize = CurrentBlocks.size();
1416
1417 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1418 int BlockIndice = TopDownIndex2Block[i];
1419 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1420 if (Block->getPreds().empty())
1421 Block->Depth = 0;
1422 else {
1423 unsigned Depth = 0;
1424 for (SIScheduleBlock *Pred : Block->getPreds()) {
1425 if (Depth < Pred->Depth + Pred->getCost())
1426 Depth = Pred->Depth + Pred->getCost();
1427 }
1428 Block->Depth = Depth;
1429 }
1430 }
1431
1432 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1433 int BlockIndice = BottomUpIndex2Block[i];
1434 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1435 if (Block->getSuccs().empty())
1436 Block->Height = 0;
1437 else {
1438 unsigned Height = 0;
1439 for (const auto &Succ : Block->getSuccs())
1440 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1441 Block->Height = Height;
1442 }
1443 }
1444}
1445
1446// SIScheduleBlockScheduler //
1447
1448SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1449 SISchedulerBlockSchedulerVariant Variant,
1450 SIScheduleBlocks BlocksStruct) :
1451 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1452 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1453 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1454
1455 // Fill the usage of every output
1456 // Warning: while by construction we always have a link between two blocks
1457 // when one needs a result from the other, the number of users of an output
1458 // is not the sum of child blocks having as input the same virtual register.
1459 // Here is an example. A produces x and y. B eats x and produces x'.
1460 // C eats x' and y. The register coalescer may have attributed the same
1461 // virtual register to x and x'.
1462 // To count accurately, we do a topological sort. In case the register is
1463 // found for several parents, we increment the usage of the one with the
1464 // highest topological index.
1465 LiveOutRegsNumUsages.resize(Blocks.size());
1466 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1
Assuming 'i' is equal to 'e'
2
Loop condition is false. Execution continues on line 1491
1467 SIScheduleBlock *Block = Blocks[i];
1468 for (unsigned Reg : Block->getInRegs()) {
1469 bool Found = false;
1470 int topoInd = -1;
1471 for (SIScheduleBlock* Pred: Block->getPreds()) {
1472 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1473 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1474
1475 if (RegPos != PredOutRegs.end()) {
1476 Found = true;
1477 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1478 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1479 }
1480 }
1481 }
1482
1483 if (!Found)
1484 continue;
1485
1486 int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1487 ++LiveOutRegsNumUsages[PredID][Reg];
1488 }
1489 }
1490
1491 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1492 BlockNumPredsLeft.resize(Blocks.size());
1493 BlockNumSuccsLeft.resize(Blocks.size());
1494
1495 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
3
Assuming 'i' is equal to 'e'
4
Loop condition is false. Execution continues on line 1502
1496 SIScheduleBlock *Block = Blocks[i];
1497 BlockNumPredsLeft[i] = Block->getPreds().size();
1498 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1499 }
1500
1501#ifndef NDEBUG
1502 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
5
Assuming 'i' is equal to 'e'
6
Loop condition is false. Execution continues on line 1508
1503 SIScheduleBlock *Block = Blocks[i];
1504 assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
: __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1504, __extension__ __PRETTY_FUNCTION__))
;
1505 }
1506#endif
1507
1508 std::set<unsigned> InRegs = DAG->getInRegs();
1509 addLiveRegs(InRegs);
1510
1511 // Increase LiveOutRegsNumUsages for blocks
1512 // producing registers consumed in another
1513 // scheduling region.
1514 for (unsigned Reg : DAG->getOutRegs()) {
1515 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1516 // Do reverse traversal
1517 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1518 SIScheduleBlock *Block = Blocks[ID];
1519 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1520
1521 if (OutRegs.find(Reg) == OutRegs.end())
1522 continue;
1523
1524 ++LiveOutRegsNumUsages[ID][Reg];
1525 break;
1526 }
1527 }
1528
1529 // Fill LiveRegsConsumers for regs that were already
1530 // defined before scheduling.
1531 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
7
Assuming 'i' is equal to 'e'
8
Loop condition is false. Execution continues on line 1550
1532 SIScheduleBlock *Block = Blocks[i];
1533 for (unsigned Reg : Block->getInRegs()) {
1534 bool Found = false;
1535 for (SIScheduleBlock* Pred: Block->getPreds()) {
1536 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1537 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1538
1539 if (RegPos != PredOutRegs.end()) {
1540 Found = true;
1541 break;
1542 }
1543 }
1544
1545 if (!Found)
1546 ++LiveRegsConsumers[Reg];
1547 }
1548 }
1549
1550 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
9
Assuming 'i' is equal to 'e'
10
Loop condition is false. Execution continues on line 1557
1551 SIScheduleBlock *Block = Blocks[i];
1552 if (BlockNumPredsLeft[i] == 0) {
1553 ReadyBlocks.push_back(Block);
1554 }
1555 }
1556
1557 while (SIScheduleBlock *Block = pickBlock()) {
11
Calling 'SIScheduleBlockScheduler::pickBlock'
1558 BlocksScheduled.push_back(Block);
1559 blockScheduled(Block);
1560 }
1561
1562 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1563 dbgs() << "Block Order:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1564 for (SIScheduleBlock* Block : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1565 dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1566 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1567 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1568 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock* Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
;
1569}
1570
1571bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1572 SIBlockSchedCandidate &TryCand) {
1573 if (!Cand.isValid()) {
1574 TryCand.Reason = NodeOrder;
1575 return true;
1576 }
1577
1578 // Try to hide high latencies.
1579 if (tryLess(TryCand.LastPosHighLatParentScheduled,
1580 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1581 return true;
1582 // Schedule high latencies early so you can hide them better.
1583 if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1584 TryCand, Cand, Latency))
1585 return true;
1586 if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
1587 TryCand, Cand, Depth))
1588 return true;
1589 if (tryGreater(TryCand.NumHighLatencySuccessors,
1590 Cand.NumHighLatencySuccessors,
1591 TryCand, Cand, Successor))
1592 return true;
1593 return false;
1594}
1595
1596bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1597 SIBlockSchedCandidate &TryCand) {
1598 if (!Cand.isValid()) {
1599 TryCand.Reason = NodeOrder;
1600 return true;
1601 }
1602
1603 if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1604 TryCand, Cand, RegUsage))
1605 return true;
1606 if (tryGreater(TryCand.NumSuccessors > 0,
1607 Cand.NumSuccessors > 0,
1608 TryCand, Cand, Successor))
1609 return true;
1610 if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1611 return true;
1612 if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1613 TryCand, Cand, RegUsage))
1614 return true;
1615 return false;
1616}
1617
1618SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1619 SIBlockSchedCandidate Cand;
1620 std::vector<SIScheduleBlock*>::iterator Best;
1621 SIScheduleBlock *Block;
1622 if (ReadyBlocks.empty())
12
Assuming the condition is false
13
Taking false branch
1623 return nullptr;
1624
1625 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1626 VregCurrentUsage, SregCurrentUsage);
1627 if (VregCurrentUsage > maxVregUsage)
14
Taking false branch
1628 maxVregUsage = VregCurrentUsage;
1629 if (SregCurrentUsage > maxSregUsage)
15
Taking false branch
1630 maxSregUsage = SregCurrentUsage;
1631 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1632 dbgs() << "Picking New Blocks\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1633 dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1634 for (SIScheduleBlock* Block : ReadyBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1635 dbgs() << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1636 dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1637 for (unsigned Reg : LiveRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1638 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1639 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1640 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1641 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1642 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock* Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
;
1643
1644 Cand.Block = nullptr;
16
Null pointer value stored to 'Cand.Block'
1645 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
17
Loop condition is false. Execution continues on line 1679
1646 E = ReadyBlocks.end(); I != E; ++I) {
1647 SIBlockSchedCandidate TryCand;
1648 TryCand.Block = *I;
1649 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1650 TryCand.VGPRUsageDiff =
1651 checkRegUsageImpact(TryCand.Block->getInRegs(),
1652 TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
1653 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1654 TryCand.NumHighLatencySuccessors =
1655 TryCand.Block->getNumHighLatencySuccessors();
1656 TryCand.LastPosHighLatParentScheduled =
1657 (unsigned int) std::max<int> (0,
1658 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1659 LastPosWaitedHighLatency);
1660 TryCand.Height = TryCand.Block->Height;
1661 // Try not to increase VGPR usage too much, else we may spill.
1662 if (VregCurrentUsage > 120 ||
1663 Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1664 if (!tryCandidateRegUsage(Cand, TryCand) &&
1665 Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1666 tryCandidateLatency(Cand, TryCand);
1667 } else {
1668 if (!tryCandidateLatency(Cand, TryCand))
1669 tryCandidateRegUsage(Cand, TryCand);
1670 }
1671 if (TryCand.Reason != NoCand) {
1672 Cand.setBest(TryCand);
1673 Best = I;
1674 DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
1675 << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
;
1676 }
1677 }
1678
1679 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
18
Within the expansion of the macro 'DEBUG':
a
Assuming 'DebugFlag' is not equal to 0
b
Assuming the condition is true
c
Called C++ object pointer is null
1680 dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1681 dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1682 << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1683 dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1684 << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1685 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1686 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1687 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
;
1688
1689 Block = Cand.Block;
1690 ReadyBlocks.erase(Best);
1691 return Block;
1692}
1693
1694// Tracking of currently alive registers to determine VGPR Usage.
1695
1696void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1697 for (unsigned Reg : Regs) {
1698 // For now only track virtual registers.
1699 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1700 continue;
1701 // If not already in the live set, then add it.
1702 (void) LiveRegs.insert(Reg);
1703 }
1704}
1705
1706void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1707 std::set<unsigned> &Regs) {
1708 for (unsigned Reg : Regs) {
1709 // For now only track virtual registers.
1710 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1711 assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__))
1712 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__))
1713 LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1713, __extension__ __PRETTY_FUNCTION__))
;
1714 --LiveRegsConsumers[Reg];
1715 if (LiveRegsConsumers[Reg] == 0)
1716 LiveRegs.erase(Pos);
1717 }
1718}
1719
1720void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1721 for (const auto &Block : Parent->getSuccs()) {
1722 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1723 ReadyBlocks.push_back(Block.first);
1724
1725 if (Parent->isHighLatencyBlock() &&
1726 Block.second == SIScheduleBlockLinkKind::Data)
1727 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1728 }
1729}
1730
1731void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1732 decreaseLiveRegs(Block, Block->getInRegs());
1733 addLiveRegs(Block->getOutRegs());
1734 releaseBlockSuccs(Block);
1735 for (std::map<unsigned, unsigned>::iterator RegI =
1736 LiveOutRegsNumUsages[Block->getID()].begin(),
1737 E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1738 std::pair<unsigned, unsigned> RegP = *RegI;
1739 // We produce this register, thus it must not be previously alive.
1740 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1741, __extension__ __PRETTY_FUNCTION__))
1741 LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1741, __extension__ __PRETTY_FUNCTION__))
;
1742 LiveRegsConsumers[RegP.first] += RegP.second;
1743 }
1744 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1745 (unsigned)LastPosWaitedHighLatency)
1746 LastPosWaitedHighLatency =
1747 LastPosHighLatencyParentScheduled[Block->getID()];
1748 ++NumBlockScheduled;
1749}
1750
1751std::vector<int>
1752SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1753 std::set<unsigned> &OutRegs) {
1754 std::vector<int> DiffSetPressure;
1755 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1756
1757 for (unsigned Reg : InRegs) {
1758 // For now only track virtual registers.
1759 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1760 continue;
1761 if (LiveRegsConsumers[Reg] > 1)
1762 continue;
1763 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1764 for (; PSetI.isValid(); ++PSetI) {
1765 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1766 }
1767 }
1768
1769 for (unsigned Reg : OutRegs) {
1770 // For now only track virtual registers.
1771 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1772 continue;
1773 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1774 for (; PSetI.isValid(); ++PSetI) {
1775 DiffSetPressure[*PSetI] += PSetI.getWeight();
1776 }
1777 }
1778
1779 return DiffSetPressure;
1780}
1781
1782// SIScheduler //
1783
1784struct SIScheduleBlockResult
1785SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1786 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1787 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1788 SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
1789 std::vector<SIScheduleBlock*> ScheduledBlocks;
1790 struct SIScheduleBlockResult Res;
1791
1792 ScheduledBlocks = Scheduler.getBlocks();
1793
1794 for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1795 SIScheduleBlock *Block = ScheduledBlocks[b];
1796 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1797
1798 for (SUnit* SU : SUs)
1799 Res.SUs.push_back(SU->NodeNum);
1800 }
1801
1802 Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1803 Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1804 return Res;
1805}
1806
1807// SIScheduleDAGMI //
1808
1809SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1810 ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)) {
1811 SITII = static_cast<const SIInstrInfo*>(TII);
1812 SITRI = static_cast<const SIRegisterInfo*>(TRI);
1813
1814 VGPRSetID = SITRI->getVGPRPressureSet();
1815 SGPRSetID = SITRI->getSGPRPressureSet();
1816}
1817
1818SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1819
1820// Code adapted from scheduleDAG.cpp
1821// Does a topological sort over the SUs.
1822// Both TopDown and BottomUp
1823void SIScheduleDAGMI::topologicalSort() {
1824 Topo.InitDAGTopologicalSorting();
1825
1826 TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1827 BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1828}
1829
1830// Move low latencies further from their user without
1831// increasing SGPR usage (in general)
1832// This is to be replaced by a better pass that would
1833// take into account SGPR usage (based on VGPR Usage
1834// and the corresponding wavefront count), that would
1835// try to merge groups of loads if it make sense, etc
1836void SIScheduleDAGMI::moveLowLatencies() {
1837 unsigned DAGSize = SUnits.size();
1838 int LastLowLatencyUser = -1;
1839 int LastLowLatencyPos = -1;
1840
1841 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1842 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1843 bool IsLowLatencyUser = false;
1844 unsigned MinPos = 0;
1845
1846 for (SDep& PredDep : SU->Preds) {
1847 SUnit *Pred = PredDep.getSUnit();
1848 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1849 IsLowLatencyUser = true;
1850 }
1851 if (Pred->NodeNum >= DAGSize)
1852 continue;
1853 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1854 if (PredPos >= MinPos)
1855 MinPos = PredPos + 1;
1856 }
1857
1858 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1859 unsigned BestPos = LastLowLatencyUser + 1;
1860 if ((int)BestPos <= LastLowLatencyPos)
1861 BestPos = LastLowLatencyPos + 1;
1862 if (BestPos < MinPos)
1863 BestPos = MinPos;
1864 if (BestPos < i) {
1865 for (unsigned u = i; u > BestPos; --u) {
1866 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1867 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1868 }
1869 ScheduledSUnits[BestPos] = SU->NodeNum;
1870 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1871 }
1872 LastLowLatencyPos = BestPos;
1873 if (IsLowLatencyUser)
1874 LastLowLatencyUser = BestPos;
1875 } else if (IsLowLatencyUser) {
1876 LastLowLatencyUser = i;
1877 // Moves COPY instructions on which depends
1878 // the low latency instructions too.
1879 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1880 bool CopyForLowLat = false;
1881 for (SDep& SuccDep : SU->Succs) {
1882 SUnit *Succ = SuccDep.getSUnit();
1883 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1884 CopyForLowLat = true;
1885 }
1886 }
1887 if (!CopyForLowLat)
1888 continue;
1889 if (MinPos < i) {
1890 for (unsigned u = i; u > MinPos; --u) {
1891 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1892 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1893 }
1894 ScheduledSUnits[MinPos] = SU->NodeNum;
1895 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1896 }
1897 }
1898 }
1899}
1900
1901void SIScheduleDAGMI::restoreSULinksLeft() {
1902 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1903 SUnits[i].isScheduled = false;
1904 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1905 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1906 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1907 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1908 }
1909}
1910
1911// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1912template<typename _Iterator> void
1913SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1914 unsigned &VgprUsage, unsigned &SgprUsage) {
1915 VgprUsage = 0;
1916 SgprUsage = 0;
1917 for (_Iterator RegI = First; RegI != End; ++RegI) {
1918 unsigned Reg = *RegI;
1919 // For now only track virtual registers
1920 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1921 continue;
1922 PSetIterator PSetI = MRI.getPressureSets(Reg);
1923 for (; PSetI.isValid(); ++PSetI) {
1924 if (*PSetI == VGPRSetID)
1925 VgprUsage += PSetI.getWeight();
1926 else if (*PSetI == SGPRSetID)
1927 SgprUsage += PSetI.getWeight();
1928 }
1929 }
1930}
1931
1932void SIScheduleDAGMI::schedule()
1933{
1934 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1935 SIScheduleBlockResult Best, Temp;
1936 DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false)
;
1937
1938 buildDAGWithRegPressure();
1939 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1940 for(SUnit& SU : SUnits)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1941 SU.dumpAll(this)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
1942 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for(SUnit& SU : SUnits) SU.dumpAll
(this); } } while (false)
;
1943
1944 topologicalSort();
1945 findRootsAndBiasEdges(TopRoots, BotRoots);
1946 // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1947 // functions, but to make them happy we must initialize
1948 // the default Scheduler implementation (even if we do not
1949 // run it)
1950 SchedImpl->initialize(this);
1951 initQueues(TopRoots, BotRoots);
1952
1953 // Fill some stats to help scheduling.
1954
1955 SUnitsLinksBackup = SUnits;
1956 IsLowLatencySU.clear();
1957 LowLatencyOffset.clear();
1958 IsHighLatencySU.clear();
1959
1960 IsLowLatencySU.resize(SUnits.size(), 0);
1961 LowLatencyOffset.resize(SUnits.size(), 0);
1962 IsHighLatencySU.resize(SUnits.size(), 0);
1963
1964 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1965 SUnit *SU = &SUnits[i];
1966 unsigned BaseLatReg;
1967 int64_t OffLatReg;
1968 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1969 IsLowLatencySU[i] = 1;
1970 if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
1971 TRI))
1972 LowLatencyOffset[i] = OffLatReg;
1973 } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
1974 IsHighLatencySU[i] = 1;
1975 }
1976
1977 SIScheduler Scheduler(this);
1978 Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
1979 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1980
1981 // if VGPR usage is extremely high, try other good performing variants
1982 // which could lead to lower VGPR usage
1983 if (Best.MaxVGPRUsage > 180) {
1984 static const std::pair<SISchedulerBlockCreatorVariant,
1985 SISchedulerBlockSchedulerVariant>
1986 Variants[] = {
1987 { LatenciesAlone, BlockRegUsageLatency },
1988// { LatenciesAlone, BlockRegUsage },
1989 { LatenciesGrouped, BlockLatencyRegUsage },
1990// { LatenciesGrouped, BlockRegUsageLatency },
1991// { LatenciesGrouped, BlockRegUsage },
1992 { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1993// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1994// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1995 };
1996 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1997 Temp = Scheduler.scheduleVariant(v.first, v.second);
1998 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1999 Best = Temp;
2000 }
2001 }
2002 // if VGPR usage is still extremely high, we may spill. Try other variants
2003 // which are less performing, but that could lead to lower VGPR usage.
2004 if (Best.MaxVGPRUsage > 200) {
2005 static const std::pair<SISchedulerBlockCreatorVariant,
2006 SISchedulerBlockSchedulerVariant>
2007 Variants[] = {
2008// { LatenciesAlone, BlockRegUsageLatency },
2009 { LatenciesAlone, BlockRegUsage },
2010// { LatenciesGrouped, BlockLatencyRegUsage },
2011 { LatenciesGrouped, BlockRegUsageLatency },
2012 { LatenciesGrouped, BlockRegUsage },
2013// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
2014 { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
2015 { LatenciesAlonePlusConsecutive, BlockRegUsage }
2016 };
2017 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
2018 Temp = Scheduler.scheduleVariant(v.first, v.second);
2019 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
2020 Best = Temp;
2021 }
2022 }
2023
2024 ScheduledSUnits = Best.SUs;
2025 ScheduledSUnitsInv.resize(SUnits.size());
2026
2027 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
2028 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
2029 }
2030
2031 moveLowLatencies();
2032
2033 // Tell the outside world about the result of the scheduling.
2034
2035 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
&& "bad initial Top tracker") ? void (0) : __assert_fail
("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2035, __extension__ __PRETTY_FUNCTION__))
;
2036 TopRPTracker.setPos(CurrentTop);
2037
2038 for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
2039 E = ScheduledSUnits.end(); I != E; ++I) {
2040 SUnit *SU = &SUnits[*I];
2041
2042 scheduleMI(SU, true);
2043
2044 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
2045 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
;
2046 }
2047
2048 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
"Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2048, __extension__ __PRETTY_FUNCTION__))
;
2049
2050 placeDebugValues();
2051
2052 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2053 dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2054 << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2055 dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2056 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2057 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
;
2058}