Bug Summary

File:lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:line 792, column 9
Value stored to 'ProposedColor' is never read

Annotated Source Code

1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#include "SIMachineScheduler.h"
16#include "AMDGPU.h"
17#include "SIInstrInfo.h"
18#include "SIRegisterInfo.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/CodeGen/LiveInterval.h"
22#include "llvm/CodeGen/LiveIntervalAnalysis.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/MachineScheduler.h"
26#include "llvm/CodeGen/RegisterPressure.h"
27#include "llvm/CodeGen/SlotIndexes.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetRegisterInfo.h"
32#include <algorithm>
33#include <cassert>
34#include <map>
35#include <set>
36#include <utility>
37#include <vector>
38
39using namespace llvm;
40
41#define DEBUG_TYPE"misched" "misched"
42
43// This scheduler implements a different scheduling algorithm than
44// GenericScheduler.
45//
46// There are several specific architecture behaviours that can't be modelled
47// for GenericScheduler:
48// . When accessing the result of an SGPR load instruction, you have to wait
49// for all the SGPR load instructions before your current instruction to
50// have finished.
51// . When accessing the result of an VGPR load instruction, you have to wait
52// for all the VGPR load instructions previous to the VGPR load instruction
53// you are interested in to finish.
54// . The less the register pressure, the best load latencies are hidden
55//
56// Moreover some specifities (like the fact a lot of instructions in the shader
57// have few dependencies) makes the generic scheduler have some unpredictable
58// behaviours. For example when register pressure becomes high, it can either
59// manage to prevent register pressure from going too high, or it can
60// increase register pressure even more than if it hadn't taken register
61// pressure into account.
62//
63// Also some other bad behaviours are generated, like loading at the beginning
64// of the shader a constant in VGPR you won't need until the end of the shader.
65//
66// The scheduling problem for SI can distinguish three main parts:
67// . Hiding high latencies (texture sampling, etc)
68// . Hiding low latencies (SGPR constant loading, etc)
69// . Keeping register usage low for better latency hiding and general
70// performance
71//
72// Some other things can also affect performance, but are hard to predict
73// (cache usage, the fact the HW can issue several instructions from different
74// wavefronts if different types, etc)
75//
76// This scheduler tries to solve the scheduling problem by dividing it into
77// simpler sub-problems. It divides the instructions into blocks, schedules
78// locally inside the blocks where it takes care of low latencies, and then
79// chooses the order of the blocks by taking care of high latencies.
80// Dividing the instructions into blocks helps control keeping register
81// usage low.
82//
83// First the instructions are put into blocks.
84// We want the blocks help control register usage and hide high latencies
85// later. To help control register usage, we typically want all local
86// computations, when for example you create a result that can be comsummed
87// right away, to be contained in a block. Block inputs and outputs would
88// typically be important results that are needed in several locations of
89// the shader. Since we do want blocks to help hide high latencies, we want
90// the instructions inside the block to have a minimal set of dependencies
91// on high latencies. It will make it easy to pick blocks to hide specific
92// high latencies.
93// The block creation algorithm is divided into several steps, and several
94// variants can be tried during the scheduling process.
95//
96// Second the order of the instructions inside the blocks is chosen.
97// At that step we do take into account only register usage and hiding
98// low latency instructions
99//
100// Third the block order is chosen, there we try to hide high latencies
101// and keep register usage low.
102//
103// After the third step, a pass is done to improve the hiding of low
104// latencies.
105//
106// Actually when talking about 'low latency' or 'high latency' it includes
107// both the latency to get the cache (or global mem) data go to the register,
108// and the bandwidth limitations.
109// Increasing the number of active wavefronts helps hide the former, but it
110// doesn't solve the latter, thus why even if wavefront count is high, we have
111// to try have as many instructions hiding high latencies as possible.
112// The OpenCL doc says for example latency of 400 cycles for a global mem access,
113// which is hidden by 10 instructions if the wavefront count is 10.
114
115// Some figures taken from AMD docs:
116// Both texture and constant L1 caches are 4-way associative with 64 bytes
117// lines.
118// Constant cache is shared with 4 CUs.
119// For texture sampling, the address generation unit receives 4 texture
120// addresses per cycle, thus we could expect texture sampling latency to be
121// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
122// instructions in a wavefront group are executed every 4 cycles),
123// or 16 instructions if the other wavefronts associated to the 3 other VALUs
124// of the CU do texture sampling too. (Don't take these figures too seriously,
125// as I'm not 100% sure of the computation)
126// Data exports should get similar latency.
127// For constant loading, the cache is shader with 4 CUs.
128// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
129// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
130// It means a simple s_buffer_load should take one instruction to hide, as
131// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
132// cache line.
133//
134// As of today the driver doesn't preload the constants in cache, thus the
135// first loads get extra latency. The doc says global memory access can be
136// 300-600 cycles. We do not specially take that into account when scheduling
137// As we expect the driver to be able to preload the constants soon.
138
139// common code //
140
141#ifndef NDEBUG
142
143static const char *getReasonStr(SIScheduleCandReason Reason) {
144 switch (Reason) {
145 case NoCand: return "NOCAND";
146 case RegUsage: return "REGUSAGE";
147 case Latency: return "LATENCY";
148 case Successor: return "SUCCESSOR";
149 case Depth: return "DEPTH";
150 case NodeOrder: return "ORDER";
151 }
152 llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 152)
;
153}
154
155#endif
156
157static bool tryLess(int TryVal, int CandVal,
158 SISchedulerCandidate &TryCand,
159 SISchedulerCandidate &Cand,
160 SIScheduleCandReason Reason) {
161 if (TryVal < CandVal) {
162 TryCand.Reason = Reason;
163 return true;
164 }
165 if (TryVal > CandVal) {
166 if (Cand.Reason > Reason)
167 Cand.Reason = Reason;
168 return true;
169 }
170 Cand.setRepeat(Reason);
171 return false;
172}
173
174static bool tryGreater(int TryVal, int CandVal,
175 SISchedulerCandidate &TryCand,
176 SISchedulerCandidate &Cand,
177 SIScheduleCandReason Reason) {
178 if (TryVal > CandVal) {
179 TryCand.Reason = Reason;
180 return true;
181 }
182 if (TryVal < CandVal) {
183 if (Cand.Reason > Reason)
184 Cand.Reason = Reason;
185 return true;
186 }
187 Cand.setRepeat(Reason);
188 return false;
189}
190
191// SIScheduleBlock //
192
193void SIScheduleBlock::addUnit(SUnit *SU) {
194 NodeNum2Index[SU->NodeNum] = SUnits.size();
195 SUnits.push_back(SU);
196}
197
198#ifndef NDEBUG
199void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
200
201 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
202 dbgs() << '\n';
203}
204#endif
205
206void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
207 SISchedCandidate &TryCand) {
208 // Initialize the candidate if needed.
209 if (!Cand.isValid()) {
210 TryCand.Reason = NodeOrder;
211 return;
212 }
213
214 if (Cand.SGPRUsage > 60 &&
215 tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
216 return;
217
218 // Schedule low latency instructions as top as possible.
219 // Order of priority is:
220 // . Low latency instructions which do not depend on other low latency
221 // instructions we haven't waited for
222 // . Other instructions which do not depend on low latency instructions
223 // we haven't waited for
224 // . Low latencies
225 // . All other instructions
226 // Goal is to get: low latency instructions - independent instructions
227 // - (eventually some more low latency instructions)
228 // - instructions that depend on the first low latency instructions.
229 // If in the block there is a lot of constant loads, the SGPR usage
230 // could go quite high, thus above the arbitrary limit of 60 will encourage
231 // use the already loaded constants (in order to release some SGPRs) before
232 // loading more.
233 if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
234 Cand.HasLowLatencyNonWaitedParent,
235 TryCand, Cand, SIScheduleCandReason::Depth))
236 return;
237
238 if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
239 TryCand, Cand, SIScheduleCandReason::Depth))
240 return;
241
242 if (TryCand.IsLowLatency &&
243 tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
244 TryCand, Cand, SIScheduleCandReason::Depth))
245 return;
246
247 if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
248 return;
249
250 // Fall through to original instruction order.
251 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
252 TryCand.Reason = NodeOrder;
253 }
254}
255
256SUnit* SIScheduleBlock::pickNode() {
257 SISchedCandidate TopCand;
258
259 for (SUnit* SU : TopReadySUs) {
260 SISchedCandidate TryCand;
261 std::vector<unsigned> pressure;
262 std::vector<unsigned> MaxPressure;
263 // Predict register usage after this instruction.
264 TryCand.SU = SU;
265 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
266 TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
267 TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
268 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
269 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
270 TryCand.HasLowLatencyNonWaitedParent =
271 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
272 tryCandidateTopDown(TopCand, TryCand);
273 if (TryCand.Reason != NoCand)
274 TopCand.setBest(TryCand);
275 }
276
277 return TopCand.SU;
278}
279
280
281// Schedule something valid.
282void SIScheduleBlock::fastSchedule() {
283 TopReadySUs.clear();
284 if (Scheduled)
285 undoSchedule();
286
287 for (SUnit* SU : SUnits) {
288 if (!SU->NumPredsLeft)
289 TopReadySUs.push_back(SU);
290 }
291
292 while (!TopReadySUs.empty()) {
293 SUnit *SU = TopReadySUs[0];
294 ScheduledSUnits.push_back(SU);
295 nodeScheduled(SU);
296 }
297
298 Scheduled = true;
299}
300
301// Returns if the register was set between first and last.
302static bool isDefBetween(unsigned Reg,
303 SlotIndex First, SlotIndex Last,
304 const MachineRegisterInfo *MRI,
305 const LiveIntervals *LIS) {
306 for (MachineRegisterInfo::def_instr_iterator
307 UI = MRI->def_instr_begin(Reg),
308 UE = MRI->def_instr_end(); UI != UE; ++UI) {
309 const MachineInstr* MI = &*UI;
310 if (MI->isDebugValue())
311 continue;
312 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
313 if (InstSlot >= First && InstSlot <= Last)
314 return true;
315 }
316 return false;
317}
318
319void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
320 MachineBasicBlock::iterator EndBlock) {
321 IntervalPressure Pressure, BotPressure;
322 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
323 LiveIntervals *LIS = DAG->getLIS();
324 MachineRegisterInfo *MRI = DAG->getMRI();
325 DAG->initRPTracker(TopRPTracker);
326 DAG->initRPTracker(BotRPTracker);
327 DAG->initRPTracker(RPTracker);
328
329 // Goes though all SU. RPTracker captures what had to be alive for the SUs
330 // to execute, and what is still alive at the end.
331 for (SUnit* SU : ScheduledSUnits) {
332 RPTracker.setPos(SU->getInstr());
333 RPTracker.advance();
334 }
335
336 // Close the RPTracker to finalize live ins/outs.
337 RPTracker.closeRegion();
338
339 // Initialize the live ins and live outs.
340 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
341 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
342
343 // Do not Track Physical Registers, because it messes up.
344 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
345 if (TargetRegisterInfo::isVirtualRegister(RegMaskPair.RegUnit))
346 LiveInRegs.insert(RegMaskPair.RegUnit);
347 }
348 LiveOutRegs.clear();
349 // There is several possibilities to distinguish:
350 // 1) Reg is not input to any instruction in the block, but is output of one
351 // 2) 1) + read in the block and not needed after it
352 // 3) 1) + read in the block but needed in another block
353 // 4) Reg is input of an instruction but another block will read it too
354 // 5) Reg is input of an instruction and then rewritten in the block.
355 // result is not read in the block (implies used in another block)
356 // 6) Reg is input of an instruction and then rewritten in the block.
357 // result is read in the block and not needed in another block
358 // 7) Reg is input of an instruction and then rewritten in the block.
359 // result is read in the block but also needed in another block
360 // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
361 // We want LiveOutRegs to contain only Regs whose content will be read after
362 // in another block, and whose content was written in the current block,
363 // that is we want it to get 1, 3, 5, 7
364 // Since we made the MIs of a block to be packed all together before
365 // scheduling, then the LiveIntervals were correct, and the RPTracker was
366 // able to correctly handle 5 vs 6, 2 vs 3.
367 // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
368 // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
369 // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
370 // The use of findDefBetween removes the case 4.
371 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
372 unsigned Reg = RegMaskPair.RegUnit;
373 if (TargetRegisterInfo::isVirtualRegister(Reg) &&
374 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
375 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
376 LIS)) {
377 LiveOutRegs.insert(Reg);
378 }
379 }
380
381 // Pressure = sum_alive_registers register size
382 // Internally llvm will represent some registers as big 128 bits registers
383 // for example, but they actually correspond to 4 actual 32 bits registers.
384 // Thus Pressure is not equal to num_alive_registers * constant.
385 LiveInPressure = TopPressure.MaxSetPressure;
386 LiveOutPressure = BotPressure.MaxSetPressure;
387
388 // Prepares TopRPTracker for top down scheduling.
389 TopRPTracker.closeTop();
390}
391
392void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
393 MachineBasicBlock::iterator EndBlock) {
394 if (!Scheduled)
395 fastSchedule();
396
397 // PreScheduling phase to set LiveIn and LiveOut.
398 initRegPressure(BeginBlock, EndBlock);
399 undoSchedule();
400
401 // Schedule for real now.
402
403 TopReadySUs.clear();
404
405 for (SUnit* SU : SUnits) {
406 if (!SU->NumPredsLeft)
407 TopReadySUs.push_back(SU);
408 }
409
410 while (!TopReadySUs.empty()) {
411 SUnit *SU = pickNode();
412 ScheduledSUnits.push_back(SU);
413 TopRPTracker.setPos(SU->getInstr());
414 TopRPTracker.advance();
415 nodeScheduled(SU);
416 }
417
418 // TODO: compute InternalAdditionnalPressure.
419 InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
420
421 // Check everything is right.
422#ifndef NDEBUG
423 assert(SUnits.size() == ScheduledSUnits.size() &&((SUnits.size() == ScheduledSUnits.size() && TopReadySUs
.empty()) ? static_cast<void> (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __PRETTY_FUNCTION__))
424 TopReadySUs.empty())((SUnits.size() == ScheduledSUnits.size() && TopReadySUs
.empty()) ? static_cast<void> (0) : __assert_fail ("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 424, __PRETTY_FUNCTION__))
;
425 for (SUnit* SU : SUnits) {
426 assert(SU->isScheduled &&((SU->isScheduled && SU->NumPredsLeft == 0) ? static_cast
<void> (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __PRETTY_FUNCTION__))
427 SU->NumPredsLeft == 0)((SU->isScheduled && SU->NumPredsLeft == 0) ? static_cast
<void> (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 427, __PRETTY_FUNCTION__))
;
428 }
429#endif
430
431 Scheduled = true;
432}
433
434void SIScheduleBlock::undoSchedule() {
435 for (SUnit* SU : SUnits) {
436 SU->isScheduled = false;
437 for (SDep& Succ : SU->Succs) {
438 if (BC->isSUInBlock(Succ.getSUnit(), ID))
439 undoReleaseSucc(SU, &Succ);
440 }
441 }
442 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
443 ScheduledSUnits.clear();
444 Scheduled = false;
445}
446
447void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
448 SUnit *SuccSU = SuccEdge->getSUnit();
449
450 if (SuccEdge->isWeak()) {
451 ++SuccSU->WeakPredsLeft;
452 return;
453 }
454 ++SuccSU->NumPredsLeft;
455}
456
457void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
458 SUnit *SuccSU = SuccEdge->getSUnit();
459
460 if (SuccEdge->isWeak()) {
461 --SuccSU->WeakPredsLeft;
462 return;
463 }
464#ifndef NDEBUG
465 if (SuccSU->NumPredsLeft == 0) {
466 dbgs() << "*** Scheduling failed! ***\n";
467 SuccSU->dump(DAG);
468 dbgs() << " has been released too many times!\n";
469 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 469)
;
470 }
471#endif
472
473 --SuccSU->NumPredsLeft;
474}
475
476/// Release Successors of the SU that are in the block or not.
477void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
478 for (SDep& Succ : SU->Succs) {
479 SUnit *SuccSU = Succ.getSUnit();
480
481 if (SuccSU->NodeNum >= DAG->SUnits.size())
482 continue;
483
484 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
485 continue;
486
487 releaseSucc(SU, &Succ);
488 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
489 TopReadySUs.push_back(SuccSU);
490 }
491}
492
493void SIScheduleBlock::nodeScheduled(SUnit *SU) {
494 // Is in TopReadySUs
495 assert (!SU->NumPredsLeft)((!SU->NumPredsLeft) ? static_cast<void> (0) : __assert_fail
("!SU->NumPredsLeft", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 495, __PRETTY_FUNCTION__))
;
496 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
497 if (I == TopReadySUs.end()) {
498 dbgs() << "Data Structure Bug in SI Scheduler\n";
499 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 499)
;
500 }
501 TopReadySUs.erase(I);
502
503 releaseSuccessors(SU, true);
504 // Scheduling this node will trigger a wait,
505 // thus propagate to other instructions that they do not need to wait either.
506 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
507 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
508
509 if (DAG->IsLowLatencySU[SU->NodeNum]) {
510 for (SDep& Succ : SU->Succs) {
511 std::map<unsigned, unsigned>::iterator I =
512 NodeNum2Index.find(Succ.getSUnit()->NodeNum);
513 if (I != NodeNum2Index.end())
514 HasLowLatencyNonWaitedParent[I->second] = 1;
515 }
516 }
517 SU->isScheduled = true;
518}
519
520void SIScheduleBlock::finalizeUnits() {
521 // We remove links from outside blocks to enable scheduling inside the block.
522 for (SUnit* SU : SUnits) {
523 releaseSuccessors(SU, false);
524 if (DAG->IsHighLatencySU[SU->NodeNum])
525 HighLatencyBlock = true;
526 }
527 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
528}
529
530// we maintain ascending order of IDs
531void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
532 unsigned PredID = Pred->getID();
533
534 // Check if not already predecessor.
535 for (SIScheduleBlock* P : Preds) {
536 if (PredID == P->getID())
537 return;
538 }
539 Preds.push_back(Pred);
540
541 assert(none_of(Succs,((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
542 [=](std::pair<SIScheduleBlock*,((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
543 SIScheduleBlockLinkKind> S) {((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
544 return PredID == S.first->getID();((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
545 }) &&((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
546 "Loop in the Block Graph!")((none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind
> S) { return PredID == S.first->getID(); }) &&
"Loop in the Block Graph!") ? static_cast<void> (0) : __assert_fail
("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 546, __PRETTY_FUNCTION__))
;
547}
548
549void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
550 SIScheduleBlockLinkKind Kind) {
551 unsigned SuccID = Succ->getID();
552
553 // Check if not already predecessor.
554 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
555 if (SuccID == S.first->getID()) {
556 if (S.second == SIScheduleBlockLinkKind::NoData &&
557 Kind == SIScheduleBlockLinkKind::Data)
558 S.second = Kind;
559 return;
560 }
561 }
562 if (Succ->isHighLatencyBlock())
563 ++NumHighLatencySuccessors;
564 Succs.push_back(std::make_pair(Succ, Kind));
565
566 assert(none_of(Preds,((none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P
->getID(); }) && "Loop in the Block Graph!") ? static_cast
<void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __PRETTY_FUNCTION__))
567 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&((none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P
->getID(); }) && "Loop in the Block Graph!") ? static_cast
<void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __PRETTY_FUNCTION__))
568 "Loop in the Block Graph!")((none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P
->getID(); }) && "Loop in the Block Graph!") ? static_cast
<void> (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 568, __PRETTY_FUNCTION__))
;
569}
570
571#ifndef NDEBUG
572void SIScheduleBlock::printDebug(bool full) {
573 dbgs() << "Block (" << ID << ")\n";
574 if (!full)
575 return;
576
577 dbgs() << "\nContains High Latency Instruction: "
578 << HighLatencyBlock << '\n';
579 dbgs() << "\nDepends On:\n";
580 for (SIScheduleBlock* P : Preds) {
581 P->printDebug(false);
582 }
583
584 dbgs() << "\nSuccessors:\n";
585 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
586 if (S.second == SIScheduleBlockLinkKind::Data)
587 dbgs() << "(Data Dep) ";
588 S.first->printDebug(false);
589 }
590
591 if (Scheduled) {
592 dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
593 << LiveInPressure[DAG->getVGPRSetID()] << '\n';
594 dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
595 << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
596 dbgs() << "LiveIns:\n";
597 for (unsigned Reg : LiveInRegs)
598 dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
599
600 dbgs() << "\nLiveOuts:\n";
601 for (unsigned Reg : LiveOutRegs)
602 dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
603 }
604
605 dbgs() << "\nInstructions:\n";
606 if (!Scheduled) {
607 for (SUnit* SU : SUnits) {
608 SU->dump(DAG);
609 }
610 } else {
611 for (SUnit* SU : SUnits) {
612 SU->dump(DAG);
613 }
614 }
615
616 dbgs() << "///////////////////////\n";
617}
618#endif
619
620// SIScheduleBlockCreator //
621
622SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
623DAG(DAG) {
624}
625
626SIScheduleBlockCreator::~SIScheduleBlockCreator() = default;
627
628SIScheduleBlocks
629SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
630 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
631 Blocks.find(BlockVariant);
632 if (B == Blocks.end()) {
633 SIScheduleBlocks Res;
634 createBlocksForVariant(BlockVariant);
635 topologicalSort();
636 scheduleInsideBlocks();
637 fillStats();
638 Res.Blocks = CurrentBlocks;
639 Res.TopDownIndex2Block = TopDownIndex2Block;
640 Res.TopDownBlock2Index = TopDownBlock2Index;
641 Blocks[BlockVariant] = Res;
642 return Res;
643 } else {
644 return B->second;
645 }
646}
647
648bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
649 if (SU->NodeNum >= DAG->SUnits.size())
650 return false;
651 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
652}
653
654void SIScheduleBlockCreator::colorHighLatenciesAlone() {
655 unsigned DAGSize = DAG->SUnits.size();
656
657 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
658 SUnit *SU = &DAG->SUnits[i];
659 if (DAG->IsHighLatencySU[SU->NodeNum]) {
660 CurrentColoring[SU->NodeNum] = NextReservedID++;
661 }
662 }
663}
664
665static bool
666hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
667 for (const auto &PredDep : SU.Preds) {
668 if (PredDep.getSUnit() == &FromSU &&
669 PredDep.getKind() == llvm::SDep::Data)
670 return true;
671 }
672 return false;
673}
674
675void SIScheduleBlockCreator::colorHighLatenciesGroups() {
676 unsigned DAGSize = DAG->SUnits.size();
677 unsigned NumHighLatencies = 0;
678 unsigned GroupSize;
679 int Color = NextReservedID;
680 unsigned Count = 0;
681 std::set<unsigned> FormingGroup;
682
683 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
684 SUnit *SU = &DAG->SUnits[i];
685 if (DAG->IsHighLatencySU[SU->NodeNum])
686 ++NumHighLatencies;
687 }
688
689 if (NumHighLatencies == 0)
690 return;
691
692 if (NumHighLatencies <= 6)
693 GroupSize = 2;
694 else if (NumHighLatencies <= 12)
695 GroupSize = 3;
696 else
697 GroupSize = 4;
698
699 for (unsigned SUNum : DAG->TopDownIndex2SU) {
700 const SUnit &SU = DAG->SUnits[SUNum];
701 if (DAG->IsHighLatencySU[SU.NodeNum]) {
702 unsigned CompatibleGroup = true;
703 int ProposedColor = Color;
704 std::vector<int> AdditionalElements;
705
706 // We don't want to put in the same block
707 // two high latency instructions that depend
708 // on each other.
709 // One way would be to check canAddEdge
710 // in both directions, but that currently is not
711 // enough because there the high latency order is
712 // enforced (via links).
713 // Instead, look at the dependencies between the
714 // high latency instructions and deduce if it is
715 // a data dependency or not.
716 for (unsigned j : FormingGroup) {
717 bool HasSubGraph;
718 std::vector<int> SubGraph;
719 // By construction (topological order), if SU and
720 // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
721 // in the parent graph of SU.
722#ifndef NDEBUG
723 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
724 HasSubGraph);
725 assert(!HasSubGraph)((!HasSubGraph) ? static_cast<void> (0) : __assert_fail
("!HasSubGraph", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 725, __PRETTY_FUNCTION__))
;
726#endif
727 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
728 HasSubGraph);
729 if (!HasSubGraph)
730 continue; // No dependencies between each other
731 else if (SubGraph.size() > 5) {
732 // Too many elements would be required to be added to the block.
733 CompatibleGroup = false;
734 break;
735 }
736 else {
737 // Check the type of dependency
738 for (unsigned k : SubGraph) {
739 // If in the path to join the two instructions,
740 // there is another high latency instruction,
741 // or instructions colored for another block
742 // abort the merge.
743 if (DAG->IsHighLatencySU[k] ||
744 (CurrentColoring[k] != ProposedColor &&
745 CurrentColoring[k] != 0)) {
746 CompatibleGroup = false;
747 break;
748 }
749 // If one of the SU in the subgraph depends on the result of SU j,
750 // there'll be a data dependency.
751 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
752 CompatibleGroup = false;
753 break;
754 }
755 }
756 if (!CompatibleGroup)
757 break;
758 // Same check for the SU
759 if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
760 CompatibleGroup = false;
761 break;
762 }
763 // Add all the required instructions to the block
764 // These cannot live in another block (because they
765 // depend (order dependency) on one of the
766 // instruction in the block, and are required for the
767 // high latency instruction we add.
768 AdditionalElements.insert(AdditionalElements.end(),
769 SubGraph.begin(), SubGraph.end());
770 }
771 }
772 if (CompatibleGroup) {
773 FormingGroup.insert(SU.NodeNum);
774 for (unsigned j : AdditionalElements)
775 CurrentColoring[j] = ProposedColor;
776 CurrentColoring[SU.NodeNum] = ProposedColor;
777 ++Count;
778 }
779 // Found one incompatible instruction,
780 // or has filled a big enough group.
781 // -> start a new one.
782 if (!CompatibleGroup) {
783 FormingGroup.clear();
784 Color = ++NextReservedID;
785 ProposedColor = Color;
786 FormingGroup.insert(SU.NodeNum);
787 CurrentColoring[SU.NodeNum] = ProposedColor;
788 Count = 0;
789 } else if (Count == GroupSize) {
790 FormingGroup.clear();
791 Color = ++NextReservedID;
792 ProposedColor = Color;
Value stored to 'ProposedColor' is never read
793 Count = 0;
794 }
795 }
796 }
797}
798
799void SIScheduleBlockCreator::colorComputeReservedDependencies() {
800 unsigned DAGSize = DAG->SUnits.size();
801 std::map<std::set<unsigned>, unsigned> ColorCombinations;
802
803 CurrentTopDownReservedDependencyColoring.clear();
804 CurrentBottomUpReservedDependencyColoring.clear();
805
806 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
807 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
808
809 // Traverse TopDown, and give different colors to SUs depending
810 // on which combination of High Latencies they depend on.
811
812 for (unsigned SUNum : DAG->TopDownIndex2SU) {
813 SUnit *SU = &DAG->SUnits[SUNum];
814 std::set<unsigned> SUColors;
815
816 // Already given.
817 if (CurrentColoring[SU->NodeNum]) {
818 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
819 CurrentColoring[SU->NodeNum];
820 continue;
821 }
822
823 for (SDep& PredDep : SU->Preds) {
824 SUnit *Pred = PredDep.getSUnit();
825 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
826 continue;
827 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
828 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
829 }
830 // Color 0 by default.
831 if (SUColors.empty())
832 continue;
833 // Same color than parents.
834 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
835 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
836 *SUColors.begin();
837 else {
838 std::map<std::set<unsigned>, unsigned>::iterator Pos =
839 ColorCombinations.find(SUColors);
840 if (Pos != ColorCombinations.end()) {
841 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
842 } else {
843 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
844 NextNonReservedID;
845 ColorCombinations[SUColors] = NextNonReservedID++;
846 }
847 }
848 }
849
850 ColorCombinations.clear();
851
852 // Same as before, but BottomUp.
853
854 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
855 SUnit *SU = &DAG->SUnits[SUNum];
856 std::set<unsigned> SUColors;
857
858 // Already given.
859 if (CurrentColoring[SU->NodeNum]) {
860 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
861 CurrentColoring[SU->NodeNum];
862 continue;
863 }
864
865 for (SDep& SuccDep : SU->Succs) {
866 SUnit *Succ = SuccDep.getSUnit();
867 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
868 continue;
869 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
870 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
871 }
872 // Keep color 0.
873 if (SUColors.empty())
874 continue;
875 // Same color than parents.
876 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
877 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
878 *SUColors.begin();
879 else {
880 std::map<std::set<unsigned>, unsigned>::iterator Pos =
881 ColorCombinations.find(SUColors);
882 if (Pos != ColorCombinations.end()) {
883 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
884 } else {
885 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
886 NextNonReservedID;
887 ColorCombinations[SUColors] = NextNonReservedID++;
888 }
889 }
890 }
891}
892
893void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
894 unsigned DAGSize = DAG->SUnits.size();
895 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
896
897 // Every combination of colors given by the top down
898 // and bottom up Reserved node dependency
899
900 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
901 SUnit *SU = &DAG->SUnits[i];
902 std::pair<unsigned, unsigned> SUColors;
903
904 // High latency instructions: already given.
905 if (CurrentColoring[SU->NodeNum])
906 continue;
907
908 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
909 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
910
911 std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
912 ColorCombinations.find(SUColors);
913 if (Pos != ColorCombinations.end()) {
914 CurrentColoring[SU->NodeNum] = Pos->second;
915 } else {
916 CurrentColoring[SU->NodeNum] = NextNonReservedID;
917 ColorCombinations[SUColors] = NextNonReservedID++;
918 }
919 }
920}
921
922void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
923 unsigned DAGSize = DAG->SUnits.size();
924 std::vector<int> PendingColoring = CurrentColoring;
925
926 assert(DAGSize >= 1 &&((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? static_cast<void> (0) : __assert_fail
("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __PRETTY_FUNCTION__))
927 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? static_cast<void> (0) : __assert_fail
("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __PRETTY_FUNCTION__))
928 CurrentTopDownReservedDependencyColoring.size() == DAGSize)((DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? static_cast<void> (0) : __assert_fail
("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 928, __PRETTY_FUNCTION__))
;
929 // If there is no reserved block at all, do nothing. We don't want
930 // everything in one block.
931 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
932 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
933 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
934 CurrentTopDownReservedDependencyColoring.end()) == 0)
935 return;
936
937 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
938 SUnit *SU = &DAG->SUnits[SUNum];
939 std::set<unsigned> SUColors;
940 std::set<unsigned> SUColorsPending;
941
942 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
943 continue;
944
945 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
946 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
947 continue;
948
949 for (SDep& SuccDep : SU->Succs) {
950 SUnit *Succ = SuccDep.getSUnit();
951 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
952 continue;
953 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
954 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
955 SUColors.insert(CurrentColoring[Succ->NodeNum]);
956 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
957 }
958 // If there is only one child/parent block, and that block
959 // is not among the ones we are removing in this path, then
960 // merge the instruction to that block
961 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
962 PendingColoring[SU->NodeNum] = *SUColors.begin();
963 else // TODO: Attribute new colors depending on color
964 // combination of children.
965 PendingColoring[SU->NodeNum] = NextNonReservedID++;
966 }
967 CurrentColoring = PendingColoring;
968}
969
970
971void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
972 unsigned DAGSize = DAG->SUnits.size();
973 unsigned PreviousColor;
974 std::set<unsigned> SeenColors;
975
976 if (DAGSize <= 1)
977 return;
978
979 PreviousColor = CurrentColoring[0];
980
981 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
982 SUnit *SU = &DAG->SUnits[i];
983 unsigned CurrentColor = CurrentColoring[i];
984 unsigned PreviousColorSave = PreviousColor;
985 assert(i == SU->NodeNum)((i == SU->NodeNum) ? static_cast<void> (0) : __assert_fail
("i == SU->NodeNum", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 985, __PRETTY_FUNCTION__))
;
986
987 if (CurrentColor != PreviousColor)
988 SeenColors.insert(PreviousColor);
989 PreviousColor = CurrentColor;
990
991 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
992 continue;
993
994 if (SeenColors.find(CurrentColor) == SeenColors.end())
995 continue;
996
997 if (PreviousColorSave != CurrentColor)
998 CurrentColoring[i] = NextNonReservedID++;
999 else
1000 CurrentColoring[i] = CurrentColoring[i-1];
1001 }
1002}
1003
1004void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
1005 unsigned DAGSize = DAG->SUnits.size();
1006
1007 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1008 SUnit *SU = &DAG->SUnits[SUNum];
1009 std::set<unsigned> SUColors;
1010
1011 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1012 continue;
1013
1014 // No predecessor: Vgpr constant loading.
1015 // Low latency instructions usually have a predecessor (the address)
1016 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
1017 continue;
1018
1019 for (SDep& SuccDep : SU->Succs) {
1020 SUnit *Succ = SuccDep.getSUnit();
1021 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1022 continue;
1023 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1024 }
1025 if (SUColors.size() == 1)
1026 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1027 }
1028}
1029
1030void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1031 unsigned DAGSize = DAG->SUnits.size();
1032
1033 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1034 SUnit *SU = &DAG->SUnits[SUNum];
1035 std::set<unsigned> SUColors;
1036
1037 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1038 continue;
1039
1040 for (SDep& SuccDep : SU->Succs) {
1041 SUnit *Succ = SuccDep.getSUnit();
1042 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1043 continue;
1044 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1045 }
1046 if (SUColors.size() == 1)
1047 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1048 }
1049}
1050
1051void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1052 unsigned DAGSize = DAG->SUnits.size();
1053
1054 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1055 SUnit *SU = &DAG->SUnits[SUNum];
1056 std::set<unsigned> SUColors;
1057
1058 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1059 continue;
1060
1061 for (SDep& SuccDep : SU->Succs) {
1062 SUnit *Succ = SuccDep.getSUnit();
1063 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1064 continue;
1065 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1066 }
1067 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1068 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1069 }
1070}
1071
1072void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1073 unsigned DAGSize = DAG->SUnits.size();
1074 std::map<unsigned, unsigned> ColorCount;
1075
1076 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1077 SUnit *SU = &DAG->SUnits[SUNum];
1078 unsigned color = CurrentColoring[SU->NodeNum];
1079 ++ColorCount[color];
1080 }
1081
1082 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1083 SUnit *SU = &DAG->SUnits[SUNum];
1084 unsigned color = CurrentColoring[SU->NodeNum];
1085 std::set<unsigned> SUColors;
1086
1087 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1088 continue;
1089
1090 if (ColorCount[color] > 1)
1091 continue;
1092
1093 for (SDep& SuccDep : SU->Succs) {
1094 SUnit *Succ = SuccDep.getSUnit();
1095 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1096 continue;
1097 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1098 }
1099 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1100 --ColorCount[color];
1101 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1102 ++ColorCount[*SUColors.begin()];
1103 }
1104 }
1105}
1106
1107void SIScheduleBlockCreator::cutHugeBlocks() {
1108 // TODO
1109}
1110
1111void SIScheduleBlockCreator::regroupNoUserInstructions() {
1112 unsigned DAGSize = DAG->SUnits.size();
1113 int GroupID = NextNonReservedID++;
1114
1115 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1116 SUnit *SU = &DAG->SUnits[SUNum];
1117 bool hasSuccessor = false;
1118
1119 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1120 continue;
1121
1122 for (SDep& SuccDep : SU->Succs) {
1123 SUnit *Succ = SuccDep.getSUnit();
1124 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1125 continue;
1126 hasSuccessor = true;
1127 }
1128 if (!hasSuccessor)
1129 CurrentColoring[SU->NodeNum] = GroupID;
1130 }
1131}
1132
1133void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1134 unsigned DAGSize = DAG->SUnits.size();
1135 std::map<unsigned,unsigned> RealID;
1136
1137 CurrentBlocks.clear();
1138 CurrentColoring.clear();
1139 CurrentColoring.resize(DAGSize, 0);
1140 Node2CurrentBlock.clear();
1141
1142 // Restore links previous scheduling variant has overridden.
1143 DAG->restoreSULinksLeft();
1144
1145 NextReservedID = 1;
1146 NextNonReservedID = DAGSize + 1;
1147
1148 DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Coloring the graph\n"; } } while
(false)
;
1149
1150 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1151 colorHighLatenciesGroups();
1152 else
1153 colorHighLatenciesAlone();
1154 colorComputeReservedDependencies();
1155 colorAccordingToReservedDependencies();
1156 colorEndsAccordingToDependencies();
1157 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1158 colorForceConsecutiveOrderInGroup();
1159 regroupNoUserInstructions();
1160 colorMergeConstantLoadsNextGroup();
1161 colorMergeIfPossibleNextGroupOnlyForReserved();
1162
1163 // Put SUs of same color into same block
1164 Node2CurrentBlock.resize(DAGSize, -1);
1165 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1166 SUnit *SU = &DAG->SUnits[i];
1167 unsigned Color = CurrentColoring[SU->NodeNum];
1168 if (RealID.find(Color) == RealID.end()) {
1169 int ID = CurrentBlocks.size();
1170 BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG, this, ID));
1171 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1172 RealID[Color] = ID;
1173 }
1174 CurrentBlocks[RealID[Color]]->addUnit(SU);
1175 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1176 }
1177
1178 // Build dependencies between blocks.
1179 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1180 SUnit *SU = &DAG->SUnits[i];
1181 int SUID = Node2CurrentBlock[i];
1182 for (SDep& SuccDep : SU->Succs) {
1183 SUnit *Succ = SuccDep.getSUnit();
1184 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1185 continue;
1186 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1187 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1188 SuccDep.isCtrl() ? NoData : Data);
1189 }
1190 for (SDep& PredDep : SU->Preds) {
1191 SUnit *Pred = PredDep.getSUnit();
1192 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1193 continue;
1194 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1195 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1196 }
1197 }
1198
1199 // Free root and leafs of all blocks to enable scheduling inside them.
1200 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1201 SIScheduleBlock *Block = CurrentBlocks[i];
1202 Block->finalizeUnits();
1203 }
1204 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1205 dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1206 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1207 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1208 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1209 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
1210 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Blocks created:\n\n"; for (unsigned
i = 0, e = CurrentBlocks.size(); i != e; ++i) { SIScheduleBlock
*Block = CurrentBlocks[i]; Block->printDebug(true); }; } }
while (false)
;
1211}
1212
1213// Two functions taken from Codegen/MachineScheduler.cpp
1214
1215/// Non-const version.
1216static MachineBasicBlock::iterator
1217nextIfDebug(MachineBasicBlock::iterator I,
1218 MachineBasicBlock::const_iterator End) {
1219 for (; I != End; ++I) {
1220 if (!I->isDebugValue())
1221 break;
1222 }
1223 return I;
1224}
1225
1226void SIScheduleBlockCreator::topologicalSort() {
1227 unsigned DAGSize = CurrentBlocks.size();
1228 std::vector<int> WorkList;
1229
1230 DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Topological Sort\n"; } } while
(false)
;
1231
1232 WorkList.reserve(DAGSize);
1233 TopDownIndex2Block.resize(DAGSize);
1234 TopDownBlock2Index.resize(DAGSize);
1235 BottomUpIndex2Block.resize(DAGSize);
1236
1237 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1238 SIScheduleBlock *Block = CurrentBlocks[i];
1239 unsigned Degree = Block->getSuccs().size();
1240 TopDownBlock2Index[i] = Degree;
1241 if (Degree == 0) {
1242 WorkList.push_back(i);
1243 }
1244 }
1245
1246 int Id = DAGSize;
1247 while (!WorkList.empty()) {
1248 int i = WorkList.back();
1249 SIScheduleBlock *Block = CurrentBlocks[i];
1250 WorkList.pop_back();
1251 TopDownBlock2Index[i] = --Id;
1252 TopDownIndex2Block[Id] = i;
1253 for (SIScheduleBlock* Pred : Block->getPreds()) {
1254 if (!--TopDownBlock2Index[Pred->getID()])
1255 WorkList.push_back(Pred->getID());
1256 }
1257 }
1258
1259#ifndef NDEBUG
1260 // Check correctness of the ordering.
1261 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1262 SIScheduleBlock *Block = CurrentBlocks[i];
1263 for (SIScheduleBlock* Pred : Block->getPreds()) {
1264 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&((TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID
()] && "Wrong Top Down topological sorting") ? static_cast
<void> (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1265, __PRETTY_FUNCTION__))
1265 "Wrong Top Down topological sorting")((TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID
()] && "Wrong Top Down topological sorting") ? static_cast
<void> (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1265, __PRETTY_FUNCTION__))
;
1266 }
1267 }
1268#endif
1269
1270 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1271 TopDownIndex2Block.rend());
1272}
1273
1274void SIScheduleBlockCreator::scheduleInsideBlocks() {
1275 unsigned DAGSize = CurrentBlocks.size();
1276
1277 DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "\nScheduling Blocks\n\n"; } }
while (false)
;
1278
1279 // We do schedule a valid scheduling such that a Block corresponds
1280 // to a range of instructions.
1281 DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false)
;
1282 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1283 SIScheduleBlock *Block = CurrentBlocks[i];
1284 Block->fastSchedule();
1285 }
1286
1287 // Note: the following code, and the part restoring previous position
1288 // is by far the most expensive operation of the Scheduler.
1289
1290 // Do not update CurrentTop.
1291 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1292 std::vector<MachineBasicBlock::iterator> PosOld;
1293 std::vector<MachineBasicBlock::iterator> PosNew;
1294 PosOld.reserve(DAG->SUnits.size());
1295 PosNew.reserve(DAG->SUnits.size());
1296
1297 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1298 int BlockIndice = TopDownIndex2Block[i];
1299 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1300 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1301
1302 for (SUnit* SU : SUs) {
1303 MachineInstr *MI = SU->getInstr();
1304 MachineBasicBlock::iterator Pos = MI;
1305 PosOld.push_back(Pos);
1306 if (&*CurrentTopFastSched == MI) {
1307 PosNew.push_back(Pos);
1308 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1309 DAG->getCurrentBottom());
1310 } else {
1311 // Update the instruction stream.
1312 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1313
1314 // Update LiveIntervals.
1315 // Note: Moving all instructions and calling handleMove every time
1316 // is the most cpu intensive operation of the scheduler.
1317 // It would gain a lot if there was a way to recompute the
1318 // LiveIntervals for the entire scheduling region.
1319 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1320 PosNew.push_back(CurrentTopFastSched);
1321 }
1322 }
1323 }
1324
1325 // Now we have Block of SUs == Block of MI.
1326 // We do the final schedule for the instructions inside the block.
1327 // The property that all the SUs of the Block are grouped together as MI
1328 // is used for correct reg usage tracking.
1329 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1330 SIScheduleBlock *Block = CurrentBlocks[i];
1331 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1332 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1333 }
1334
1335 DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Restoring MI Pos\n"; } } while
(false)
;
1336 // Restore old ordering (which prevents a LIS->handleMove bug).
1337 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1338 MachineBasicBlock::iterator POld = PosOld[i-1];
1339 MachineBasicBlock::iterator PNew = PosNew[i-1];
1340 if (PNew != POld) {
1341 // Update the instruction stream.
1342 DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1343
1344 // Update LiveIntervals.
1345 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1346 }
1347 }
1348
1349 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
1350 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
1351 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
1352 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
1353 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
1354 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for (unsigned i = 0, e = CurrentBlocks.size();
i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks[i]; Block
->printDebug(true); }; } } while (false)
;
1355}
1356
1357void SIScheduleBlockCreator::fillStats() {
1358 unsigned DAGSize = CurrentBlocks.size();
1359
1360 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1361 int BlockIndice = TopDownIndex2Block[i];
1362 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1363 if (Block->getPreds().empty())
1364 Block->Depth = 0;
1365 else {
1366 unsigned Depth = 0;
1367 for (SIScheduleBlock *Pred : Block->getPreds()) {
1368 if (Depth < Pred->Depth + 1)
1369 Depth = Pred->Depth + 1;
1370 }
1371 Block->Depth = Depth;
1372 }
1373 }
1374
1375 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1376 int BlockIndice = BottomUpIndex2Block[i];
1377 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1378 if (Block->getSuccs().empty())
1379 Block->Height = 0;
1380 else {
1381 unsigned Height = 0;
1382 for (const auto &Succ : Block->getSuccs())
1383 Height = std::min(Height, Succ.first->Height + 1);
1384 Block->Height = Height;
1385 }
1386 }
1387}
1388
1389// SIScheduleBlockScheduler //
1390
1391SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1392 SISchedulerBlockSchedulerVariant Variant,
1393 SIScheduleBlocks BlocksStruct) :
1394 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1395 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1396 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1397
1398 // Fill the usage of every output
1399 // Warning: while by construction we always have a link between two blocks
1400 // when one needs a result from the other, the number of users of an output
1401 // is not the sum of child blocks having as input the same virtual register.
1402 // Here is an example. A produces x and y. B eats x and produces x'.
1403 // C eats x' and y. The register coalescer may have attributed the same
1404 // virtual register to x and x'.
1405 // To count accurately, we do a topological sort. In case the register is
1406 // found for several parents, we increment the usage of the one with the
1407 // highest topological index.
1408 LiveOutRegsNumUsages.resize(Blocks.size());
1409 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1410 SIScheduleBlock *Block = Blocks[i];
1411 for (unsigned Reg : Block->getInRegs()) {
1412 bool Found = false;
1413 int topoInd = -1;
1414 for (SIScheduleBlock* Pred: Block->getPreds()) {
1415 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1416 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1417
1418 if (RegPos != PredOutRegs.end()) {
1419 Found = true;
1420 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1421 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1422 }
1423 }
1424 }
1425
1426 if (!Found)
1427 continue;
1428
1429 int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1430 ++LiveOutRegsNumUsages[PredID][Reg];
1431 }
1432 }
1433
1434 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1435 BlockNumPredsLeft.resize(Blocks.size());
1436 BlockNumSuccsLeft.resize(Blocks.size());
1437
1438 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1439 SIScheduleBlock *Block = Blocks[i];
1440 BlockNumPredsLeft[i] = Block->getPreds().size();
1441 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1442 }
1443
1444#ifndef NDEBUG
1445 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1446 SIScheduleBlock *Block = Blocks[i];
1447 assert(Block->getID() == i)((Block->getID() == i) ? static_cast<void> (0) : __assert_fail
("Block->getID() == i", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1447, __PRETTY_FUNCTION__))
;
1448 }
1449#endif
1450
1451 std::set<unsigned> InRegs = DAG->getInRegs();
1452 addLiveRegs(InRegs);
1453
1454 // Increase LiveOutRegsNumUsages for blocks
1455 // producing registers consumed in another
1456 // scheduling region.
1457 for (unsigned Reg : DAG->getOutRegs()) {
1458 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1459 // Do reverse traversal
1460 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1461 SIScheduleBlock *Block = Blocks[ID];
1462 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1463
1464 if (OutRegs.find(Reg) == OutRegs.end())
1465 continue;
1466
1467 ++LiveOutRegsNumUsages[ID][Reg];
1468 break;
1469 }
1470 }
1471
1472 // Fill LiveRegsConsumers for regs that were already
1473 // defined before scheduling.
1474 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1475 SIScheduleBlock *Block = Blocks[i];
1476 for (unsigned Reg : Block->getInRegs()) {
1477 bool Found = false;
1478 for (SIScheduleBlock* Pred: Block->getPreds()) {
1479 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1480 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1481
1482 if (RegPos != PredOutRegs.end()) {
1483 Found = true;
1484 break;
1485 }
1486 }
1487
1488 if (!Found)
1489 ++LiveRegsConsumers[Reg];
1490 }
1491 }
1492
1493 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1494 SIScheduleBlock *Block = Blocks[i];
1495 if (BlockNumPredsLeft[i] == 0) {
1496 ReadyBlocks.push_back(Block);
1497 }
1498 }
1499
1500 while (SIScheduleBlock *Block = pickBlock()) {
1501 BlocksScheduled.push_back(Block);
1502 blockScheduled(Block);
1503 }
1504
1505 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1506 dbgs() << "Block Order:";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1507 for (SIScheduleBlock* Block : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1508 dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1509 }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1510 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
1511 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Block Order:"; for (SIScheduleBlock
* Block : BlocksScheduled) { dbgs() << ' ' << Block
->getID(); } dbgs() << '\n';; } } while (false)
;
1512}
1513
1514bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1515 SIBlockSchedCandidate &TryCand) {
1516 if (!Cand.isValid()) {
1517 TryCand.Reason = NodeOrder;
1518 return true;
1519 }
1520
1521 // Try to hide high latencies.
1522 if (tryLess(TryCand.LastPosHighLatParentScheduled,
1523 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1524 return true;
1525 // Schedule high latencies early so you can hide them better.
1526 if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1527 TryCand, Cand, Latency))
1528 return true;
1529 if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
1530 TryCand, Cand, Depth))
1531 return true;
1532 if (tryGreater(TryCand.NumHighLatencySuccessors,
1533 Cand.NumHighLatencySuccessors,
1534 TryCand, Cand, Successor))
1535 return true;
1536 return false;
1537}
1538
1539bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1540 SIBlockSchedCandidate &TryCand) {
1541 if (!Cand.isValid()) {
1542 TryCand.Reason = NodeOrder;
1543 return true;
1544 }
1545
1546 if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1547 TryCand, Cand, RegUsage))
1548 return true;
1549 if (tryGreater(TryCand.NumSuccessors > 0,
1550 Cand.NumSuccessors > 0,
1551 TryCand, Cand, Successor))
1552 return true;
1553 if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1554 return true;
1555 if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1556 TryCand, Cand, RegUsage))
1557 return true;
1558 return false;
1559}
1560
1561SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1562 SIBlockSchedCandidate Cand;
1563 std::vector<SIScheduleBlock*>::iterator Best;
1564 SIScheduleBlock *Block;
1565 if (ReadyBlocks.empty())
1566 return nullptr;
1567
1568 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1569 VregCurrentUsage, SregCurrentUsage);
1570 if (VregCurrentUsage > maxVregUsage)
1571 maxVregUsage = VregCurrentUsage;
1572 if (SregCurrentUsage > maxSregUsage)
1573 maxSregUsage = SregCurrentUsage;
1574 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1575 dbgs() << "Picking New Blocks\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1576 dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1577 for (SIScheduleBlock* Block : ReadyBlocks)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1578 dbgs() << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1579 dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1580 for (unsigned Reg : LiveRegs)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1581 dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1582 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1583 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1584 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
1585 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking New Blocks\n"; dbgs()
<< "Available: "; for (SIScheduleBlock* Block : ReadyBlocks
) dbgs() << Block->getID() << ' '; dbgs() <<
"\nCurrent Live:\n"; for (unsigned Reg : LiveRegs) dbgs() <<
PrintVRegOrUnit(Reg, DAG->getTRI()) << ' '; dbgs() <<
'\n'; dbgs() << "Current VGPRs: " << VregCurrentUsage
<< '\n'; dbgs() << "Current SGPRs: " << SregCurrentUsage
<< '\n';; } } while (false)
;
1586
1587 Cand.Block = nullptr;
1588 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1589 E = ReadyBlocks.end(); I != E; ++I) {
1590 SIBlockSchedCandidate TryCand;
1591 TryCand.Block = *I;
1592 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1593 TryCand.VGPRUsageDiff =
1594 checkRegUsageImpact(TryCand.Block->getInRegs(),
1595 TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
1596 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1597 TryCand.NumHighLatencySuccessors =
1598 TryCand.Block->getNumHighLatencySuccessors();
1599 TryCand.LastPosHighLatParentScheduled =
1600 (unsigned int) std::max<int> (0,
1601 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1602 LastPosWaitedHighLatency);
1603 TryCand.Height = TryCand.Block->Height;
1604 // Try not to increase VGPR usage too much, else we may spill.
1605 if (VregCurrentUsage > 120 ||
1606 Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1607 if (!tryCandidateRegUsage(Cand, TryCand) &&
1608 Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1609 tryCandidateLatency(Cand, TryCand);
1610 } else {
1611 if (!tryCandidateLatency(Cand, TryCand))
1612 tryCandidateRegUsage(Cand, TryCand);
1613 }
1614 if (TryCand.Reason != NoCand) {
1615 Cand.setBest(TryCand);
1616 Best = I;
1617 DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Best Current Choice: " <<
Cand.Block->getID() << ' ' << getReasonStr(Cand
.Reason) << '\n'; } } while (false)
1618 << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Best Current Choice: " <<
Cand.Block->getID() << ' ' << getReasonStr(Cand
.Reason) << '\n'; } } while (false)
;
1619 }
1620 }
1621
1622 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1623 dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1624 dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1625 << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1626 dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1627 << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1628 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1629 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1630 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Picking: " << Cand.Block
->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
;
1631
1632 Block = Cand.Block;
1633 ReadyBlocks.erase(Best);
1634 return Block;
1635}
1636
1637// Tracking of currently alive registers to determine VGPR Usage.
1638
1639void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1640 for (unsigned Reg : Regs) {
1641 // For now only track virtual registers.
1642 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1643 continue;
1644 // If not already in the live set, then add it.
1645 (void) LiveRegs.insert(Reg);
1646 }
1647}
1648
1649void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1650 std::set<unsigned> &Regs) {
1651 for (unsigned Reg : Regs) {
1652 // For now only track virtual registers.
1653 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1654 assert (Pos != LiveRegs.end() && // Reg must be live.((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg
) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg
] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1656, __PRETTY_FUNCTION__))
1655 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg
) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg
] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1656, __PRETTY_FUNCTION__))
1656 LiveRegsConsumers[Reg] >= 1)((Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg
) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg
] >= 1) ? static_cast<void> (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1656, __PRETTY_FUNCTION__))
;
1657 --LiveRegsConsumers[Reg];
1658 if (LiveRegsConsumers[Reg] == 0)
1659 LiveRegs.erase(Pos);
1660 }
1661}
1662
1663void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1664 for (const auto &Block : Parent->getSuccs()) {
1665 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1666 ReadyBlocks.push_back(Block.first);
1667
1668 if (Parent->isHighLatencyBlock() &&
1669 Block.second == SIScheduleBlockLinkKind::Data)
1670 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1671 }
1672}
1673
1674void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1675 decreaseLiveRegs(Block, Block->getInRegs());
1676 addLiveRegs(Block->getOutRegs());
1677 releaseBlockSuccs(Block);
1678 for (std::map<unsigned, unsigned>::iterator RegI =
1679 LiveOutRegsNumUsages[Block->getID()].begin(),
1680 E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1681 std::pair<unsigned, unsigned> RegP = *RegI;
1682 // We produce this register, thus it must not be previously alive.
1683 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||((LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end
() || LiveRegsConsumers[RegP.first] == 0) ? static_cast<void
> (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1684, __PRETTY_FUNCTION__))
1684 LiveRegsConsumers[RegP.first] == 0)((LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end
() || LiveRegsConsumers[RegP.first] == 0) ? static_cast<void
> (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1684, __PRETTY_FUNCTION__))
;
1685 LiveRegsConsumers[RegP.first] += RegP.second;
1686 }
1687 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1688 (unsigned)LastPosWaitedHighLatency)
1689 LastPosWaitedHighLatency =
1690 LastPosHighLatencyParentScheduled[Block->getID()];
1691 ++NumBlockScheduled;
1692}
1693
1694std::vector<int>
1695SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1696 std::set<unsigned> &OutRegs) {
1697 std::vector<int> DiffSetPressure;
1698 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1699
1700 for (unsigned Reg : InRegs) {
1701 // For now only track virtual registers.
1702 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1703 continue;
1704 if (LiveRegsConsumers[Reg] > 1)
1705 continue;
1706 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1707 for (; PSetI.isValid(); ++PSetI) {
1708 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1709 }
1710 }
1711
1712 for (unsigned Reg : OutRegs) {
1713 // For now only track virtual registers.
1714 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1715 continue;
1716 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1717 for (; PSetI.isValid(); ++PSetI) {
1718 DiffSetPressure[*PSetI] += PSetI.getWeight();
1719 }
1720 }
1721
1722 return DiffSetPressure;
1723}
1724
1725// SIScheduler //
1726
1727struct SIScheduleBlockResult
1728SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1729 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1730 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1731 SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
1732 std::vector<SIScheduleBlock*> ScheduledBlocks;
1733 struct SIScheduleBlockResult Res;
1734
1735 ScheduledBlocks = Scheduler.getBlocks();
1736
1737 for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1738 SIScheduleBlock *Block = ScheduledBlocks[b];
1739 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1740
1741 for (SUnit* SU : SUs)
1742 Res.SUs.push_back(SU->NodeNum);
1743 }
1744
1745 Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1746 Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1747 return Res;
1748}
1749
1750// SIScheduleDAGMI //
1751
1752SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1753 ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)) {
1754 SITII = static_cast<const SIInstrInfo*>(TII);
1755 SITRI = static_cast<const SIRegisterInfo*>(TRI);
1756
1757 VGPRSetID = SITRI->getVGPRPressureSet();
1758 SGPRSetID = SITRI->getSGPRPressureSet();
1759}
1760
1761SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1762
1763// Code adapted from scheduleDAG.cpp
1764// Does a topological sort over the SUs.
1765// Both TopDown and BottomUp
1766void SIScheduleDAGMI::topologicalSort() {
1767 Topo.InitDAGTopologicalSorting();
1768
1769 TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1770 BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1771}
1772
1773// Move low latencies further from their user without
1774// increasing SGPR usage (in general)
1775// This is to be replaced by a better pass that would
1776// take into account SGPR usage (based on VGPR Usage
1777// and the corresponding wavefront count), that would
1778// try to merge groups of loads if it make sense, etc
1779void SIScheduleDAGMI::moveLowLatencies() {
1780 unsigned DAGSize = SUnits.size();
1781 int LastLowLatencyUser = -1;
1782 int LastLowLatencyPos = -1;
1783
1784 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1785 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1786 bool IsLowLatencyUser = false;
1787 unsigned MinPos = 0;
1788
1789 for (SDep& PredDep : SU->Preds) {
1790 SUnit *Pred = PredDep.getSUnit();
1791 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1792 IsLowLatencyUser = true;
1793 }
1794 if (Pred->NodeNum >= DAGSize)
1795 continue;
1796 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1797 if (PredPos >= MinPos)
1798 MinPos = PredPos + 1;
1799 }
1800
1801 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1802 unsigned BestPos = LastLowLatencyUser + 1;
1803 if ((int)BestPos <= LastLowLatencyPos)
1804 BestPos = LastLowLatencyPos + 1;
1805 if (BestPos < MinPos)
1806 BestPos = MinPos;
1807 if (BestPos < i) {
1808 for (unsigned u = i; u > BestPos; --u) {
1809 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1810 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1811 }
1812 ScheduledSUnits[BestPos] = SU->NodeNum;
1813 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1814 }
1815 LastLowLatencyPos = BestPos;
1816 if (IsLowLatencyUser)
1817 LastLowLatencyUser = BestPos;
1818 } else if (IsLowLatencyUser) {
1819 LastLowLatencyUser = i;
1820 // Moves COPY instructions on which depends
1821 // the low latency instructions too.
1822 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1823 bool CopyForLowLat = false;
1824 for (SDep& SuccDep : SU->Succs) {
1825 SUnit *Succ = SuccDep.getSUnit();
1826 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1827 CopyForLowLat = true;
1828 }
1829 }
1830 if (!CopyForLowLat)
1831 continue;
1832 if (MinPos < i) {
1833 for (unsigned u = i; u > MinPos; --u) {
1834 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1835 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1836 }
1837 ScheduledSUnits[MinPos] = SU->NodeNum;
1838 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1839 }
1840 }
1841 }
1842}
1843
1844void SIScheduleDAGMI::restoreSULinksLeft() {
1845 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1846 SUnits[i].isScheduled = false;
1847 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1848 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1849 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1850 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1851 }
1852}
1853
1854// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1855template<typename _Iterator> void
1856SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1857 unsigned &VgprUsage, unsigned &SgprUsage) {
1858 VgprUsage = 0;
1859 SgprUsage = 0;
1860 for (_Iterator RegI = First; RegI != End; ++RegI) {
1861 unsigned Reg = *RegI;
1862 // For now only track virtual registers
1863 if (!TargetRegisterInfo::isVirtualRegister(Reg))
1864 continue;
1865 PSetIterator PSetI = MRI.getPressureSets(Reg);
1866 for (; PSetI.isValid(); ++PSetI) {
1867 if (*PSetI == VGPRSetID)
1868 VgprUsage += PSetI.getWeight();
1869 else if (*PSetI == SGPRSetID)
1870 SgprUsage += PSetI.getWeight();
1871 }
1872 }
1873}
1874
1875void SIScheduleDAGMI::schedule()
1876{
1877 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1878 SIScheduleBlockResult Best, Temp;
1879 DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Preparing Scheduling\n"; } } while
(false)
;
1880
1881 buildDAGWithRegPressure();
1882 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for(SUnit& SU : SUnits) SU.dumpAll(this); }
} while (false)
1883 for(SUnit& SU : SUnits)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for(SUnit& SU : SUnits) SU.dumpAll(this); }
} while (false)
1884 SU.dumpAll(this)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for(SUnit& SU : SUnits) SU.dumpAll(this); }
} while (false)
1885 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { for(SUnit& SU : SUnits) SU.dumpAll(this); }
} while (false)
;
1886
1887 topologicalSort();
1888 findRootsAndBiasEdges(TopRoots, BotRoots);
1889 // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1890 // functions, but to make them happy we must initialize
1891 // the default Scheduler implementation (even if we do not
1892 // run it)
1893 SchedImpl->initialize(this);
1894 initQueues(TopRoots, BotRoots);
1895
1896 // Fill some stats to help scheduling.
1897
1898 SUnitsLinksBackup = SUnits;
1899 IsLowLatencySU.clear();
1900 LowLatencyOffset.clear();
1901 IsHighLatencySU.clear();
1902
1903 IsLowLatencySU.resize(SUnits.size(), 0);
1904 LowLatencyOffset.resize(SUnits.size(), 0);
1905 IsHighLatencySU.resize(SUnits.size(), 0);
1906
1907 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1908 SUnit *SU = &SUnits[i];
1909 unsigned BaseLatReg;
1910 int64_t OffLatReg;
1911 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1912 IsLowLatencySU[i] = 1;
1913 if (SITII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseLatReg, OffLatReg,
1914 TRI))
1915 LowLatencyOffset[i] = OffLatReg;
1916 } else if (SITII->isHighLatencyInstruction(*SU->getInstr()))
1917 IsHighLatencySU[i] = 1;
1918 }
1919
1920 SIScheduler Scheduler(this);
1921 Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
1922 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1923
1924 // if VGPR usage is extremely high, try other good performing variants
1925 // which could lead to lower VGPR usage
1926 if (Best.MaxVGPRUsage > 180) {
1927 static const std::pair<SISchedulerBlockCreatorVariant,
1928 SISchedulerBlockSchedulerVariant>
1929 Variants[] = {
1930 { LatenciesAlone, BlockRegUsageLatency },
1931// { LatenciesAlone, BlockRegUsage },
1932 { LatenciesGrouped, BlockLatencyRegUsage },
1933// { LatenciesGrouped, BlockRegUsageLatency },
1934// { LatenciesGrouped, BlockRegUsage },
1935 { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1936// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1937// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1938 };
1939 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1940 Temp = Scheduler.scheduleVariant(v.first, v.second);
1941 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1942 Best = Temp;
1943 }
1944 }
1945 // if VGPR usage is still extremely high, we may spill. Try other variants
1946 // which are less performing, but that could lead to lower VGPR usage.
1947 if (Best.MaxVGPRUsage > 200) {
1948 static const std::pair<SISchedulerBlockCreatorVariant,
1949 SISchedulerBlockSchedulerVariant>
1950 Variants[] = {
1951// { LatenciesAlone, BlockRegUsageLatency },
1952 { LatenciesAlone, BlockRegUsage },
1953// { LatenciesGrouped, BlockLatencyRegUsage },
1954 { LatenciesGrouped, BlockRegUsageLatency },
1955 { LatenciesGrouped, BlockRegUsage },
1956// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1957 { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1958 { LatenciesAlonePlusConsecutive, BlockRegUsage }
1959 };
1960 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1961 Temp = Scheduler.scheduleVariant(v.first, v.second);
1962 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1963 Best = Temp;
1964 }
1965 }
1966
1967 ScheduledSUnits = Best.SUs;
1968 ScheduledSUnitsInv.resize(SUnits.size());
1969
1970 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1971 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1972 }
1973
1974 moveLowLatencies();
1975
1976 // Tell the outside world about the result of the scheduling.
1977
1978 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")((TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"
) ? static_cast<void> (0) : __assert_fail ("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1978, __PRETTY_FUNCTION__))
;
1979 TopRPTracker.setPos(CurrentTop);
1980
1981 for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
1982 E = ScheduledSUnits.end(); I != E; ++I) {
1983 SUnit *SU = &SUnits[*I];
1984
1985 scheduleMI(SU, true);
1986
1987 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Scheduling SU(" << SU->
NodeNum << ") " << *SU->getInstr(); } } while (
false)
1988 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { dbgs() << "Scheduling SU(" << SU->
NodeNum << ") " << *SU->getInstr(); } } while (
false)
;
1989 }
1990
1991 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")((CurrentTop == CurrentBottom && "Nonempty unscheduled zone."
) ? static_cast<void> (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1991, __PRETTY_FUNCTION__))
;
1992
1993 placeDebugValues();
1994
1995 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
1996 unsigned BBNum = begin()->getParent()->getNumber();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
1997 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
1998 dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
1999 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
2000 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("misched")) { { unsigned BBNum = begin()->getParent()->
getNumber(); dbgs() << "*** Final schedule for BB#" <<
BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'
; }; } } while (false)
;
2001}