Line data Source code
1 : //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // MachineScheduler schedules machine instructions after phi elimination. It
11 : // preserves LiveIntervals so it can be invoked before register allocation.
12 : //
13 : //===----------------------------------------------------------------------===//
14 :
15 : #include "llvm/CodeGen/MachineScheduler.h"
16 : #include "llvm/ADT/ArrayRef.h"
17 : #include "llvm/ADT/BitVector.h"
18 : #include "llvm/ADT/DenseMap.h"
19 : #include "llvm/ADT/PriorityQueue.h"
20 : #include "llvm/ADT/STLExtras.h"
21 : #include "llvm/ADT/SmallVector.h"
22 : #include "llvm/ADT/iterator_range.h"
23 : #include "llvm/Analysis/AliasAnalysis.h"
24 : #include "llvm/CodeGen/LiveInterval.h"
25 : #include "llvm/CodeGen/LiveIntervals.h"
26 : #include "llvm/CodeGen/MachineBasicBlock.h"
27 : #include "llvm/CodeGen/MachineDominators.h"
28 : #include "llvm/CodeGen/MachineFunction.h"
29 : #include "llvm/CodeGen/MachineFunctionPass.h"
30 : #include "llvm/CodeGen/MachineInstr.h"
31 : #include "llvm/CodeGen/MachineLoopInfo.h"
32 : #include "llvm/CodeGen/MachineOperand.h"
33 : #include "llvm/CodeGen/MachinePassRegistry.h"
34 : #include "llvm/CodeGen/MachineRegisterInfo.h"
35 : #include "llvm/CodeGen/Passes.h"
36 : #include "llvm/CodeGen/RegisterClassInfo.h"
37 : #include "llvm/CodeGen/RegisterPressure.h"
38 : #include "llvm/CodeGen/ScheduleDAG.h"
39 : #include "llvm/CodeGen/ScheduleDAGInstrs.h"
40 : #include "llvm/CodeGen/ScheduleDAGMutation.h"
41 : #include "llvm/CodeGen/ScheduleDFS.h"
42 : #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
43 : #include "llvm/CodeGen/SlotIndexes.h"
44 : #include "llvm/CodeGen/TargetInstrInfo.h"
45 : #include "llvm/CodeGen/TargetLowering.h"
46 : #include "llvm/CodeGen/TargetPassConfig.h"
47 : #include "llvm/CodeGen/TargetRegisterInfo.h"
48 : #include "llvm/CodeGen/TargetSchedule.h"
49 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
50 : #include "llvm/Config/llvm-config.h"
51 : #include "llvm/MC/LaneBitmask.h"
52 : #include "llvm/Pass.h"
53 : #include "llvm/Support/CommandLine.h"
54 : #include "llvm/Support/Compiler.h"
55 : #include "llvm/Support/Debug.h"
56 : #include "llvm/Support/ErrorHandling.h"
57 : #include "llvm/Support/GraphWriter.h"
58 : #include "llvm/Support/MachineValueType.h"
59 : #include "llvm/Support/raw_ostream.h"
60 : #include <algorithm>
61 : #include <cassert>
62 : #include <cstdint>
63 : #include <iterator>
64 : #include <limits>
65 : #include <memory>
66 : #include <string>
67 : #include <tuple>
68 : #include <utility>
69 : #include <vector>
70 :
71 : using namespace llvm;
72 :
73 : #define DEBUG_TYPE "machine-scheduler"
74 :
75 : namespace llvm {
76 :
77 : cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78 : cl::desc("Force top-down list scheduling"));
79 : cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80 : cl::desc("Force bottom-up list scheduling"));
81 : cl::opt<bool>
82 : DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83 : cl::desc("Print critical path length to stdout"));
84 :
85 : } // end namespace llvm
86 :
87 : #ifndef NDEBUG
88 : static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89 : cl::desc("Pop up a window to show MISched dags after they are processed"));
90 :
91 : /// In some situations a few uninteresting nodes depend on nearly all other
92 : /// nodes in the graph, provide a cutoff to hide them.
93 : static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94 : cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 :
96 : static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97 : cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 :
99 : static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100 : cl::desc("Only schedule this function"));
101 : static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102 : cl::desc("Only schedule this MBB#"));
103 : static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104 : cl::desc("Print schedule DAGs"));
105 : #else
106 : static const bool ViewMISchedDAGs = false;
107 : static const bool PrintDAGs = false;
108 : #endif // NDEBUG
109 :
110 : /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 : /// size of the ready lists.
112 : static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113 : cl::desc("Limit ready list to N instructions"), cl::init(256));
114 :
115 : static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116 : cl::desc("Enable register pressure scheduling."), cl::init(true));
117 :
118 : static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119 : cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 :
121 : static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122 : cl::desc("Enable memop clustering."),
123 : cl::init(true));
124 :
125 : static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126 : cl::desc("Verify machine instrs before and after machine scheduling"));
127 :
128 : // DAG subtrees must have at least this many nodes.
129 : static const unsigned MinSubtreeSize = 8;
130 :
131 : // Pin the vtables to this file.
132 0 : void MachineSchedStrategy::anchor() {}
133 :
134 0 : void ScheduleDAGMutation::anchor() {}
135 :
136 : //===----------------------------------------------------------------------===//
137 : // Machine Instruction Scheduling Pass and Registry
138 : //===----------------------------------------------------------------------===//
139 :
140 23651 : MachineSchedContext::MachineSchedContext() {
141 23651 : RegClassInfo = new RegisterClassInfo();
142 23651 : }
143 :
144 46974 : MachineSchedContext::~MachineSchedContext() {
145 23487 : delete RegClassInfo;
146 23487 : }
147 0 :
148 : namespace {
149 0 :
150 46974 : /// Base class for a machine scheduler class that can run at any point.
151 23487 : class MachineSchedulerBase : public MachineSchedContext,
152 23487 : public MachineFunctionPass {
153 : public:
154 : MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 :
156 : void print(raw_ostream &O, const Module* = nullptr) const override;
157 :
158 : protected:
159 : void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 23651 : };
161 :
162 : /// MachineScheduler runs after coalescing and before register allocation.
163 : class MachineScheduler : public MachineSchedulerBase {
164 : public:
165 : MachineScheduler();
166 :
167 : void getAnalysisUsage(AnalysisUsage &AU) const override;
168 :
169 : bool runOnMachineFunction(MachineFunction&) override;
170 :
171 : static char ID; // Class identification, replacement for typeinfo
172 :
173 : protected:
174 : ScheduleDAGInstrs *createMachineScheduler();
175 : };
176 :
177 : /// PostMachineScheduler runs after shortly before code emission.
178 : class PostMachineScheduler : public MachineSchedulerBase {
179 : public:
180 : PostMachineScheduler();
181 :
182 : void getAnalysisUsage(AnalysisUsage &AU) const override;
183 :
184 : bool runOnMachineFunction(MachineFunction&) override;
185 :
186 : static char ID; // Class identification, replacement for typeinfo
187 :
188 : protected:
189 : ScheduleDAGInstrs *createPostMachineScheduler();
190 : };
191 :
192 : } // end anonymous namespace
193 :
194 : char MachineScheduler::ID = 0;
195 :
196 : char &llvm::MachineSchedulerID = MachineScheduler::ID;
197 :
198 : INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199 : "Machine Instruction Scheduler", false, false)
200 : INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
201 : INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
202 : INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
203 : INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
204 31780 : INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
205 : "Machine Instruction Scheduler", false, false)
206 31780 :
207 31780 : MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
208 31780 : initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
209 31780 : }
210 168905 :
211 : void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212 : AU.setPreservesCFG();
213 20198 : AU.addRequiredID(MachineDominatorsID);
214 20198 : AU.addRequired<MachineLoopInfo>();
215 20198 : AU.addRequired<AAResultsWrapperPass>();
216 : AU.addRequired<TargetPassConfig>();
217 20035 : AU.addRequired<SlotIndexes>();
218 20035 : AU.addPreserved<SlotIndexes>();
219 20035 : AU.addRequired<LiveIntervals>();
220 : AU.addPreserved<LiveIntervals>();
221 : MachineFunctionPass::getAnalysisUsage(AU);
222 : }
223 :
224 : char PostMachineScheduler::ID = 0;
225 :
226 : char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
227 20035 :
228 20035 : INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229 : "PostRA Machine Instruction Scheduler", false, false)
230 :
231 : PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
232 : initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
233 : }
234 88600 :
235 : void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236 : AU.setPreservesCFG();
237 3453 : AU.addRequiredID(MachineDominatorsID);
238 3453 : AU.addRequired<MachineLoopInfo>();
239 3453 : AU.addRequired<TargetPassConfig>();
240 : MachineFunctionPass::getAnalysisUsage(AU);
241 3424 : }
242 3424 :
243 3424 : MachinePassRegistry MachineSchedRegistry::Registry;
244 :
245 : /// A dummy default scheduler factory indicates whether the scheduler
246 3424 : /// is overridden on the command line.
247 3424 : static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
248 : return nullptr;
249 : }
250 :
251 : /// MachineSchedOpt allows command line selection of the scheduler.
252 : static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
253 0 : RegisterPassParser<MachineSchedRegistry>>
254 0 : MachineSchedOpt("misched",
255 : cl::init(&useDefaultMachineSched), cl::Hidden,
256 : cl::desc("Machine instruction scheduler to use"));
257 :
258 : static MachineSchedRegistry
259 : DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
260 : useDefaultMachineSched);
261 :
262 : static cl::opt<bool> EnableMachineSched(
263 : "enable-misched",
264 : cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
265 : cl::Hidden);
266 :
267 : static cl::opt<bool> EnablePostRAMachineSched(
268 : "enable-post-misched",
269 : cl::desc("Enable the post-ra machine instruction scheduling pass."),
270 : cl::init(true), cl::Hidden);
271 :
272 : /// Decrement this iterator until reaching the top or a non-debug instr.
273 : static MachineBasicBlock::const_iterator
274 : priorNonDebug(MachineBasicBlock::const_iterator I,
275 : MachineBasicBlock::const_iterator Beg) {
276 : assert(I != Beg && "reached the top of the region, cannot decrement");
277 : while (--I != Beg) {
278 : if (!I->isDebugInstr())
279 : break;
280 2897911 : }
281 : return I;
282 : }
283 2981912 :
284 : /// Non-const version.
285 : static MachineBasicBlock::iterator
286 : priorNonDebug(MachineBasicBlock::iterator I,
287 2897911 : MachineBasicBlock::const_iterator Beg) {
288 : return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
289 : .getNonConstIterator();
290 : }
291 :
292 : /// If this iterator is a debug value, increment until reaching the End or a
293 : /// non-debug instruction.
294 2897911 : static MachineBasicBlock::const_iterator
295 : nextIfDebug(MachineBasicBlock::const_iterator I,
296 : MachineBasicBlock::const_iterator End) {
297 : for(; I != End; ++I) {
298 : if (!I->isDebugInstr())
299 : break;
300 : }
301 1903830 : return I;
302 : }
303 2001338 :
304 : /// Non-const version.
305 : static MachineBasicBlock::iterator
306 : nextIfDebug(MachineBasicBlock::iterator I,
307 1903830 : MachineBasicBlock::const_iterator End) {
308 : return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
309 : .getNonConstIterator();
310 : }
311 :
312 : /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
313 : ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
314 1053111 : // Select the scheduler, or set the default.
315 : MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
316 : if (Ctor != useDefaultMachineSched)
317 : return Ctor(this);
318 :
319 167111 : // Get the default scheduler set by the target for this function.
320 : ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
321 : if (Scheduler)
322 167111 : return Scheduler;
323 17 :
324 : // Default to GenericScheduler.
325 : return createGenericSchedLive(this);
326 167094 : }
327 167094 :
328 : /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
329 : /// the caller. We don't have a command line option to override the postRA
330 : /// scheduler. The Target must configure it.
331 16520 : ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
332 : // Get the postRA scheduler set by the target for this function.
333 : ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
334 : if (Scheduler)
335 : return Scheduler;
336 :
337 22418 : // Default to GenericScheduler.
338 : return createGenericSchedPostRA(this);
339 22418 : }
340 22418 :
341 : /// Top-level MachineScheduler pass driver.
342 : ///
343 : /// Visit blocks in function order. Divide each block into scheduling regions
344 8920 : /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
345 : /// consistent with the DAG builder, which traverses the interior of the
346 : /// scheduling regions bottom-up.
347 : ///
348 : /// This design avoids exposing scheduling boundaries to the DAG builder,
349 : /// simplifying the DAG builder's support for "special" target instructions.
350 : /// At the same time the design allows target schedulers to operate across
351 : /// scheduling boundaries, for example to bundle the boundary instructions
352 : /// without reordering them. This creates complexity, because the target
353 : /// scheduler must update the RegionBegin and RegionEnd positions cached by
354 : /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
355 : /// design would be to split blocks at scheduling boundaries, but LLVM has a
356 : /// general bias against block splitting purely for implementation simplicity.
357 : bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
358 : if (skipFunction(mf.getFunction()))
359 : return false;
360 :
361 : if (EnableMachineSched.getNumOccurrences()) {
362 : if (!EnableMachineSched)
363 197898 : return false;
364 197898 : } else if (!mf.getSubtarget().enableMachineScheduler())
365 : return false;
366 :
367 197712 : LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
368 451 :
369 : // Initialize the context of the pass.
370 197261 : MF = &mf;
371 : MLI = &getAnalysis<MachineLoopInfo>();
372 : MDT = &getAnalysis<MachineDominatorTree>();
373 : PassConfig = &getAnalysis<TargetPassConfig>();
374 : AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
375 :
376 167111 : LIS = &getAnalysis<LiveIntervals>();
377 167111 :
378 167111 : if (VerifyScheduling) {
379 167111 : LLVM_DEBUG(LIS->dump());
380 167111 : MF->verify(this, "Before machine scheduling.");
381 : }
382 167111 : RegClassInfo->runOnMachineFunction(*MF);
383 :
384 167111 : // Instantiate the selected scheduler for this target, function, and
385 : // optimization level.
386 1 : std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
387 : scheduleRegions(*Scheduler, false);
388 167111 :
389 : LLVM_DEBUG(LIS->dump());
390 : if (VerifyScheduling)
391 : MF->verify(this, "After machine scheduling.");
392 167111 : return true;
393 167111 : }
394 :
395 : bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
396 167111 : if (skipFunction(mf.getFunction()))
397 1 : return false;
398 :
399 : if (EnablePostRAMachineSched.getNumOccurrences()) {
400 : if (!EnablePostRAMachineSched)
401 27273 : return false;
402 27273 : } else if (!mf.getSubtarget().enablePostRAScheduler()) {
403 : LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
404 : return false;
405 27266 : }
406 45 : LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
407 :
408 27221 : // Initialize the context of the pass.
409 : MF = &mf;
410 : MLI = &getAnalysis<MachineLoopInfo>();
411 : PassConfig = &getAnalysis<TargetPassConfig>();
412 :
413 : if (VerifyScheduling)
414 : MF->verify(this, "Before post machine scheduling.");
415 22418 :
416 22418 : // Instantiate the selected scheduler for this target, function, and
417 22418 : // optimization level.
418 : std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
419 22418 : scheduleRegions(*Scheduler, true);
420 0 :
421 : if (VerifyScheduling)
422 : MF->verify(this, "After post machine scheduling.");
423 : return true;
424 22418 : }
425 22418 :
426 : /// Return true of the given instruction should not be included in a scheduling
427 22418 : /// region.
428 0 : ///
429 : /// MachineScheduler does not currently support scheduling across calls. To
430 : /// handle calls, the DAG builder needs to be modified to create register
431 : /// anti/output dependencies on the registers clobbered by the call's regmask
432 : /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
433 : /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
434 : /// the boundary, but there would be no benefit to postRA scheduling across
435 : /// calls this late anyway.
436 : static bool isSchedBoundary(MachineBasicBlock::iterator MI,
437 : MachineBasicBlock *MBB,
438 : MachineFunction *MF,
439 : const TargetInstrInfo *TII) {
440 : return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
441 : }
442 3923495 :
443 : /// A region of an MBB for scheduling.
444 : namespace {
445 : struct SchedRegion {
446 3923495 : /// RegionBegin is the first instruction in the scheduling region, and
447 : /// RegionEnd is either MBB->end() or the scheduling boundary after the
448 : /// last instruction in the scheduling region. These iterators cannot refer
449 : /// to instructions outside of the identified scheduling region because
450 : /// those may be reordered before scheduling this region.
451 : MachineBasicBlock::iterator RegionBegin;
452 : MachineBasicBlock::iterator RegionEnd;
453 : unsigned NumRegionInstrs;
454 :
455 : SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
456 : unsigned N) :
457 : RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
458 : };
459 : } // end anonymous namespace
460 :
461 : using MBBRegionsVector = SmallVector<SchedRegion, 16>;
462 999719 :
463 999719 : static void
464 : getSchedRegions(MachineBasicBlock *MBB,
465 : MBBRegionsVector &Regions,
466 : bool RegionsTopDown) {
467 : MachineFunction *MF = MBB->getParent();
468 : const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
469 :
470 451711 : MachineBasicBlock::iterator I = nullptr;
471 : for(MachineBasicBlock::iterator RegionEnd = MBB->end();
472 : RegionEnd != MBB->begin(); RegionEnd = I) {
473 451711 :
474 451711 : // Avoid decrementing RegionEnd for blocks with no terminator.
475 : if (RegionEnd != MBB->end() ||
476 : isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
477 451711 : --RegionEnd;
478 1451430 : }
479 :
480 : // The next region starts above the previous region. Look backward in the
481 1448090 : // instruction stream until we find the nearest boundary.
482 1896461 : unsigned NumRegionInstrs = 0;
483 : I = RegionEnd;
484 : for (;I != MBB->begin(); --I) {
485 : MachineInstr &MI = *std::prev(I);
486 : if (isSchedBoundary(&MI, &*MBB, MF, TII))
487 : break;
488 : if (!MI.isDebugInstr())
489 999719 : // MBB::size() uses instr_iterator to count. Here we need a bundle to
490 3923495 : // count as a single instruction.
491 3475124 : ++NumRegionInstrs;
492 3475124 : }
493 :
494 : Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
495 : }
496 :
497 2800923 : if (RegionsTopDown)
498 : std::reverse(Regions.begin(), Regions.end());
499 : }
500 999719 :
501 : /// Main driver for both MachineScheduler and PostMachineScheduler.
502 : void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
503 451711 : bool FixKillFlags) {
504 : // Visit all machine basic blocks.
505 451711 : //
506 : // TODO: Visit blocks in global postorder or postorder within the bottom-up
507 : // loop tree. Then we can optionally compute global RegPressure.
508 189529 : for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
509 : MBB != MBBEnd; ++MBB) {
510 :
511 : Scheduler.startBlock(&*MBB);
512 :
513 : #ifndef NDEBUG
514 189529 : if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
515 641240 : continue;
516 : if (SchedOnlyBlock.getNumOccurrences()
517 451711 : && (int)SchedOnlyBlock != MBB->getNumber())
518 : continue;
519 : #endif
520 :
521 : // Break the block into scheduling regions [I, RegionEnd). RegionEnd
522 : // points to the scheduling boundary at the bottom of the region. The DAG
523 : // does not include RegionEnd, but the region does (i.e. the next
524 : // RegionEnd is above the previous RegionBegin). If the current block has
525 : // no terminator then RegionEnd == MBB->end() for the bottom region.
526 : //
527 : // All the regions of MBB are first found and stored in MBBRegions, which
528 : // will be processed (MBB) top-down if initialized with true.
529 : //
530 : // The Scheduler may insert instructions during either schedule() or
531 : // exitRegion(), even for empty regions. So the local iterators 'I' and
532 : // 'RegionEnd' are invalid across these calls. Instructions must not be
533 : // added to other regions than the current one without updating MBBRegions.
534 :
535 : MBBRegionsVector MBBRegions;
536 : getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
537 : for (MBBRegionsVector::iterator R = MBBRegions.begin();
538 : R != MBBRegions.end(); ++R) {
539 : MachineBasicBlock::iterator I = R->RegionBegin;
540 : MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
541 451711 : unsigned NumRegionInstrs = R->NumRegionInstrs;
542 451711 :
543 999719 : // Notify the scheduler of the region, even if we may skip scheduling
544 1451430 : // it. Perhaps it still needs to be bundled.
545 999719 : Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
546 999719 :
547 999719 : // Skip empty scheduling regions (0 or 1 schedulable instructions).
548 : if (I == RegionEnd || I == std::prev(RegionEnd)) {
549 : // Close the current region. Bundle the terminator if needed.
550 : // This invalidates 'RegionEnd' and 'I'.
551 999719 : Scheduler.exitRegion();
552 : continue;
553 : }
554 1595743 : LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
555 : LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
556 : << " " << MBB->getName() << "\n From: " << *I
557 547993 : << " To: ";
558 : if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
559 : else dbgs() << "End";
560 : dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
561 : if (DumpCriticalPathLength) {
562 : errs() << MF->getName();
563 : errs() << ":%bb. " << MBB->getNumber();
564 : errs() << " " << MBB->getName() << " \n";
565 : }
566 :
567 451726 : // Schedule a region: possibly reorder instructions.
568 0 : // This invalidates the original region iterators.
569 0 : Scheduler.schedule();
570 0 :
571 : // Close the current region.
572 : Scheduler.exitRegion();
573 : }
574 : Scheduler.finishBlock();
575 451726 : // FIXME: Ideally, no further passes should rely on kill flags. However,
576 : // thumb2 size reduction is currently an exception, so the PostMIScheduler
577 : // needs to do this.
578 451726 : if (FixKillFlags)
579 : Scheduler.fixupKills(*MBB);
580 451711 : }
581 : Scheduler.finalizeSchedule();
582 : }
583 :
584 451711 : void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
585 28811 : // unimplemented
586 : }
587 189529 :
588 189529 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
589 : LLVM_DUMP_METHOD void ReadyQueue::dump() const {
590 0 : dbgs() << "Queue " << Name << ": ";
591 : for (const SUnit *SU : Queue)
592 0 : dbgs() << SU->NodeNum << " ";
593 : dbgs() << "\n";
594 : }
595 : #endif
596 :
597 : //===----------------------------------------------------------------------===//
598 : // ScheduleDAGMI - Basic machine instruction scheduling. This is
599 : // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
600 : // virtual registers.
601 : // ===----------------------------------------------------------------------===/
602 :
603 : // Provide a vtable anchor.
604 : ScheduleDAGMI::~ScheduleDAGMI() = default;
605 :
606 : bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
607 : return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
608 : }
609 :
610 : bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
611 : if (SuccSU != &ExitSU) {
612 19971 : // Do not use WillCreateCycle, it assumes SD scheduling.
613 19971 : // If Pred is reachable from Succ, then the edge creates a cycle.
614 : if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
615 : return false;
616 146863 : Topo.AddPred(SuccSU, PredDep.getSUnit());
617 146863 : }
618 : SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
619 : // Return true regardless of whether a new edge needed to be inserted.
620 241948 : return true;
621 : }
622 120105 :
623 : /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
624 145994 : /// NumPredsLeft reaches zero, release the successor node.
625 : ///
626 145994 : /// FIXME: Adjust SuccSU height based on MinLatency.
627 : void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
628 : SUnit *SuccSU = SuccEdge->getSUnit();
629 :
630 : if (SuccEdge->isWeak()) {
631 : --SuccSU->WeakPredsLeft;
632 : if (SuccEdge->isCluster())
633 366227 : NextClusterSucc = SuccSU;
634 : return;
635 : }
636 : #ifndef NDEBUG
637 6917 : if (SuccSU->NumPredsLeft == 0) {
638 : dbgs() << "*** Scheduling failed! ***\n";
639 6849 : dumpNode(*SuccSU);
640 6917 : dbgs() << " has been released too many times!\n";
641 : llvm_unreachable(nullptr);
642 : }
643 : #endif
644 : // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
645 : // CurrCycle may have advanced since then.
646 : if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
647 : SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
648 :
649 : --SuccSU->NumPredsLeft;
650 : if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
651 : SchedImpl->releaseTopNode(SuccSU);
652 359310 : }
653 252269 :
654 : /// releaseSuccessors - Call releaseSucc on each of SU's successors.
655 359310 : void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
656 359310 : for (SDep &Succ : SU->Succs)
657 144753 : releaseSucc(SU, &Succ);
658 : }
659 :
660 : /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
661 647054 : /// NumSuccsLeft reaches zero, release the predecessor node.
662 1013281 : ///
663 366227 : /// FIXME: Adjust PredSU height based on MinLatency.
664 647054 : void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
665 : SUnit *PredSU = PredEdge->getSUnit();
666 :
667 : if (PredEdge->isWeak()) {
668 : --PredSU->WeakSuccsLeft;
669 : if (PredEdge->isCluster())
670 4191126 : NextClusterPred = PredSU;
671 : return;
672 : }
673 : #ifndef NDEBUG
674 43345 : if (PredSU->NumSuccsLeft == 0) {
675 : dbgs() << "*** Scheduling failed! ***\n";
676 41093 : dumpNode(*PredSU);
677 43345 : dbgs() << " has been released too many times!\n";
678 : llvm_unreachable(nullptr);
679 : }
680 : #endif
681 : // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
682 : // CurrCycle may have advanced since then.
683 : if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
684 : PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
685 :
686 : --PredSU->NumSuccsLeft;
687 : if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
688 : SchedImpl->releaseBottomNode(PredSU);
689 4147781 : }
690 2671933 :
691 : /// releasePredecessors - Call releasePred on each of SU's predecessors.
692 4147781 : void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
693 4147781 : for (SDep &Pred : SU->Preds)
694 2160004 : releasePred(SU, &Pred);
695 : }
696 :
697 : void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
698 2941566 : ScheduleDAGInstrs::startBlock(bb);
699 7132692 : SchedImpl->enterMBB(bb);
700 4191126 : }
701 2941566 :
702 : void ScheduleDAGMI::finishBlock() {
703 472445 : SchedImpl->leaveMBB();
704 472445 : ScheduleDAGInstrs::finishBlock();
705 472445 : }
706 472445 :
707 : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
708 473137 : /// crossing a scheduling boundary. [begin, end) includes all instructions in
709 473137 : /// the region, including the boundary itself and single-instruction regions
710 473137 : /// that don't get scheduled.
711 473137 : void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
712 : MachineBasicBlock::iterator begin,
713 : MachineBasicBlock::iterator end,
714 : unsigned regioninstrs)
715 : {
716 : ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
717 1021179 :
718 : SchedImpl->initPolicy(begin, end, regioninstrs);
719 : }
720 :
721 : /// This is normally called from the main scheduler loop but may also be invoked
722 1021179 : /// by the scheduling strategy to perform additional code motion.
723 : void ScheduleDAGMI::moveInstruction(
724 1021179 : MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
725 1021179 : // Advance RegionBegin if the first instruction moves down.
726 : if (&*RegionBegin == MI)
727 : ++RegionBegin;
728 :
729 215569 : // Update the instruction stream.
730 : BB->splice(InsertPos, BB, MI);
731 :
732 215569 : // Update LiveIntervals
733 : if (LIS)
734 : LIS->handleMove(*MI, /*UpdateFlags=*/true);
735 :
736 431138 : // Recede RegionBegin if an instruction moves above the first.
737 : if (RegionBegin == InsertPos)
738 : RegionBegin = MI;
739 215569 : }
740 205042 :
741 : bool ScheduleDAGMI::checkSchedLimit() {
742 : #ifndef NDEBUG
743 215569 : if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
744 2216 : CurrentTop = CurrentBottom;
745 215569 : return false;
746 : }
747 2684496 : ++NumInstrsScheduled;
748 : #endif
749 : return true;
750 : }
751 :
752 : /// Per-region scheduling driver, called back from
753 : /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
754 : /// does not consider liveness or register pressure. It is useful for PostRA
755 2684496 : /// scheduling and potentially other custom schedulers.
756 : void ScheduleDAGMI::schedule() {
757 : LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
758 : LLVM_DEBUG(SchedImpl->dumpPolicy());
759 :
760 : // Build the DAG.
761 : buildSchedGraph(AA);
762 19370 :
763 : Topo.InitDAGTopologicalSorting();
764 :
765 : postprocessDAG();
766 :
767 19370 : SmallVector<SUnit*, 8> TopRoots, BotRoots;
768 : findRootsAndBiasEdges(TopRoots, BotRoots);
769 19370 :
770 : LLVM_DEBUG(dump());
771 19370 : if (PrintDAGs) dump();
772 : if (ViewMISchedDAGs) viewGraph();
773 :
774 19370 : // Initialize the strategy before modifying the DAG.
775 : // This may initialize a DFSResult to be used for queue priority.
776 : SchedImpl->initialize(this);
777 :
778 : // Initialize ready queues now that the DAG and priority data are finalized.
779 : initQueues(TopRoots, BotRoots);
780 :
781 : bool IsTopNode = false;
782 19370 : while (true) {
783 : LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
784 : SUnit *SU = SchedImpl->pickNode(IsTopNode);
785 19370 : if (!SU) break;
786 :
787 19370 : assert(!SU->isScheduled && "Node already scheduled");
788 : if (!checkSchedLimit())
789 : break;
790 113495 :
791 113495 : MachineInstr *MI = SU->getInstr();
792 : if (IsTopNode) {
793 : assert(SU->isTopReady() && "node still has unscheduled dependencies");
794 94125 : if (&*CurrentTop == MI)
795 : CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
796 : else
797 94125 : moveInstruction(MI, CurrentTop);
798 94125 : } else {
799 : assert(SU->isBottomReady() && "node still has unscheduled dependencies");
800 94125 : MachineBasicBlock::iterator priorII =
801 83598 : priorNonDebug(CurrentBottom, CurrentTop);
802 : if (&*priorII == MI)
803 10527 : CurrentBottom = priorII;
804 : else {
805 : if (&*CurrentTop == MI)
806 : CurrentTop = nextIfDebug(++CurrentTop, priorII);
807 0 : moveInstruction(MI, CurrentBottom);
808 0 : CurrentBottom = MI;
809 0 : }
810 : }
811 0 : // Notify the scheduling strategy before updating the DAG.
812 0 : // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
813 0 : // runs, it can then use the accurate ReadyCycle time to determine whether
814 0 : // newly released nodes can move to the readyQ.
815 : SchedImpl->schedNode(SU, IsTopNode);
816 :
817 : updateQueues(SU, IsTopNode);
818 : }
819 : assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
820 :
821 94125 : placeDebugValues();
822 :
823 94125 : LLVM_DEBUG({
824 94125 : dbgs() << "*** Final schedule for "
825 : << printMBBReference(*begin()->getParent()) << " ***\n";
826 : dumpSchedule();
827 19370 : dbgs() << '\n';
828 : });
829 : }
830 :
831 : /// Apply each ScheduleDAGMutation step in order.
832 : void ScheduleDAGMI::postprocessDAG() {
833 : for (auto &m : Mutations)
834 : m->apply(this);
835 19370 : }
836 :
837 : void ScheduleDAGMI::
838 452060 : findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
839 1359734 : SmallVectorImpl<SUnit*> &BotRoots) {
840 907674 : for (SUnit &SU : SUnits) {
841 452060 : assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
842 :
843 452070 : // Order predecessors so DFSResult follows the critical path.
844 : SU.biasCriticalPath();
845 :
846 3139006 : // A SUnit is ready to top schedule if it has no predecessors.
847 : if (!SU.NumPredsLeft)
848 : TopRoots.push_back(&SU);
849 : // A SUnit is ready to bottom schedule if it has no successors.
850 2686936 : if (!SU.NumSuccsLeft)
851 : BotRoots.push_back(&SU);
852 : }
853 2686936 : ExitSU.biasCriticalPath();
854 1182130 : }
855 :
856 2686936 : /// Identify DAG roots and setup scheduler queues.
857 429784 : void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
858 : ArrayRef<SUnit*> BotRoots) {
859 452070 : NextClusterSucc = nullptr;
860 452070 : NextClusterPred = nullptr;
861 :
862 : // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
863 452062 : //
864 : // Nodes with unreleased weak edges can still be roots.
865 452062 : // Release top roots in forward order.
866 452062 : for (SUnit *SU : TopRoots)
867 : SchedImpl->releaseTopNode(SU);
868 :
869 : // Release bottom roots in reverse order so the higher priority nodes appear
870 : // first. This is more natural and slightly more efficient.
871 : for (SmallVectorImpl<SUnit*>::const_reverse_iterator
872 1634172 : I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
873 1182110 : SchedImpl->releaseBottomNode(*I);
874 : }
875 :
876 : releaseSuccessors(&EntrySU);
877 : releasePredecessors(&ExitSU);
878 881838 :
879 429776 : SchedImpl->registerRoots();
880 :
881 : // Advance past initial DebugValues.
882 452062 : CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
883 452062 : CurrentBottom = RegionEnd;
884 : }
885 452062 :
886 : /// Update scheduler queues after scheduling an instruction.
887 : void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
888 452062 : // Release dependent instructions for scheduling.
889 452062 : if (IsTopNode)
890 452062 : releaseSuccessors(SU);
891 : else
892 : releasePredecessors(SU);
893 2684496 :
894 : SU->isScheduled = true;
895 2684496 : }
896 194992 :
897 : /// Reinsert any remaining debug_values, just like the PostRA scheduler.
898 2489504 : void ScheduleDAGMI::placeDebugValues() {
899 : // If first instruction was a DBG_VALUE then put it back.
900 2684496 : if (FirstDbgValue) {
901 2684496 : BB->splice(RegionBegin, BB, FirstDbgValue);
902 : RegionBegin = FirstDbgValue;
903 : }
904 452081 :
905 : for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
906 452081 : DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
907 22164 : std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
908 11082 : MachineInstr *DbgValue = P.first;
909 : MachineBasicBlock::iterator OrigPrevMI = P.second;
910 : if (&*RegionBegin == DbgValue)
911 : ++RegionBegin;
912 563427 : BB->splice(++OrigPrevMI, BB, DbgValue);
913 111346 : if (OrigPrevMI == std::prev(RegionEnd))
914 : RegionEnd = DbgValue;
915 : }
916 111346 : DbgValues.clear();
917 : FirstDbgValue = nullptr;
918 222692 : }
919 111346 :
920 4778 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
921 : LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
922 : for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
923 452081 : if (SUnit *SU = getSUnit(&(*MI)))
924 452081 : dumpNode(*SU);
925 : else
926 : dbgs() << "Missing SUnit\n";
927 : }
928 : }
929 : #endif
930 :
931 : //===----------------------------------------------------------------------===//
932 : // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
933 : // preservation.
934 : //===----------------------------------------------------------------------===//
935 :
936 : ScheduleDAGMILive::~ScheduleDAGMILive() {
937 : delete DFSResult;
938 : }
939 :
940 : void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
941 : const MachineInstr &MI = *SU.getInstr();
942 812752 : for (const MachineOperand &MO : MI.operands()) {
943 167111 : if (!MO.isReg())
944 311419 : continue;
945 144308 : if (!MO.readsReg())
946 : continue;
947 144308 : if (TrackLaneMasks && !MO.isUse())
948 668444 : continue;
949 167111 :
950 167111 : unsigned Reg = MO.getReg();
951 : if (!TargetRegisterInfo::isVirtualRegister(Reg))
952 1536618 : continue;
953 1536618 :
954 9002609 : // Ignore re-defs.
955 7465991 : if (TrackLaneMasks) {
956 : bool FoundDef = false;
957 : for (const MachineOperand &MO2 : MI.operands()) {
958 : if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
959 3546597 : FoundDef = true;
960 : break;
961 : }
962 3425453 : }
963 3425453 : if (FoundDef)
964 : continue;
965 : }
966 :
967 1609173 : // Record this local VReg use.
968 : VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
969 2155029 : for (; UI != VRegUses.end(); ++UI) {
970 1806181 : if (UI->SU == &SU)
971 : break;
972 : }
973 : if (UI == VRegUses.end())
974 : VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
975 354591 : }
976 : }
977 :
978 : /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
979 : /// crossing a scheduling boundary. [begin, end) includes all instructions in
980 1603430 : /// the region, including the boundary itself and single-instruction regions
981 : /// that don't get scheduled.
982 14779093 : void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
983 : MachineBasicBlock::iterator begin,
984 : MachineBasicBlock::iterator end,
985 : unsigned regioninstrs)
986 1581057 : {
987 : // ScheduleDAGMI initializes SchedImpl's per-region policy.
988 1536618 : ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
989 :
990 : // For convenience remember the end of the liveness region.
991 : LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
992 :
993 : SUPressureDiffs.clear();
994 979712 :
995 : ShouldTrackPressure = SchedImpl->shouldTrackPressure();
996 : ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
997 :
998 : assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
999 : "ShouldTrackLaneMasks requires ShouldTrackPressure");
1000 979712 : }
1001 :
1002 : // Setup the register pressure trackers for the top scheduled top and bottom
1003 1919806 : // scheduled regions.
1004 : void ScheduleDAGMILive::initRegPressure() {
1005 : VRegUses.clear();
1006 : VRegUses.setUniverse(MRI.getNumVirtRegs());
1007 979712 : for (SUnit &SU : SUnits)
1008 979712 : collectVRegUses(SU);
1009 :
1010 : TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
1011 : ShouldTrackLaneMasks, false);
1012 979712 : BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1013 : ShouldTrackLaneMasks, false);
1014 :
1015 : // Close the RPTracker to finalize live ins.
1016 124424 : RPTracker.closeRegion();
1017 :
1018 248848 : LLVM_DEBUG(RPTracker.dump());
1019 1661042 :
1020 1536618 : // Initialize the live ins and live outs.
1021 : TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1022 124424 : BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1023 124424 :
1024 124424 : // Close one end of the tracker so we can call
1025 124424 : // getMaxUpward/DownwardPressureDelta before advancing across any
1026 : // instructions. This converts currently live regs into live ins/outs.
1027 : TopRPTracker.closeTop();
1028 124424 : BotRPTracker.closeBottom();
1029 :
1030 : BotRPTracker.initLiveThru(RPTracker);
1031 : if (!BotRPTracker.getLiveThru().empty()) {
1032 : TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1033 248848 : LLVM_DEBUG(dbgs() << "Live Thru: ";
1034 248848 : dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1035 : };
1036 :
1037 : // For each live out vreg reduce the pressure change associated with other
1038 : // uses of the same vreg below the live-out reaching def.
1039 124424 : updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1040 124424 :
1041 : // Account for liveness generated by the region boundary.
1042 124424 : if (LiveRegionEnd != RegionEnd) {
1043 124424 : SmallVector<RegisterMaskPair, 8> LiveUses;
1044 : BotRPTracker.recede(&LiveUses);
1045 : updatePressureDiffs(LiveUses);
1046 : }
1047 :
1048 : LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1049 : dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1050 : dbgs() << "Bottom Pressure:\n";
1051 248848 : dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1052 :
1053 : assert((BotRPTracker.getPos() == RegionEnd ||
1054 124424 : (RegionEnd->isDebugInstr() &&
1055 : BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1056 113476 : "Can't find the region bottom");
1057 113476 :
1058 : // Cache the list of excess pressure sets in this region. This will also track
1059 : // the max pressure in the scheduled code for these sets.
1060 : RegionCriticalPSets.clear();
1061 : const std::vector<unsigned> &RegionPressure =
1062 : RPTracker.getPressure().MaxSetPressure;
1063 : for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1064 : unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1065 : if (RegionPressure[i] > Limit) {
1066 : LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1067 : << " Actual " << RegionPressure[i] << "\n");
1068 : RegionCriticalPSets.push_back(PressureChange(i));
1069 : }
1070 : }
1071 : LLVM_DEBUG(dbgs() << "Excess PSets: ";
1072 124424 : for (const PressureChange &RCPS
1073 : : RegionCriticalPSets) dbgs()
1074 124424 : << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1075 3811047 : dbgs() << "\n");
1076 3562199 : }
1077 7124398 :
1078 : void ScheduleDAGMILive::
1079 : updateScheduledPressure(const SUnit *SU,
1080 4868 : const std::vector<unsigned> &NewMaxPressure) {
1081 : const PressureDiff &PDiff = getPressureDiff(SU);
1082 : unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1083 : for (const PressureChange &PC : PDiff) {
1084 : if (!PC.isValid())
1085 : break;
1086 : unsigned ID = PC.getPSet();
1087 : while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1088 124424 : ++CritIdx;
1089 : if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1090 1536618 : if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1091 : && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1092 : RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1093 1536618 : }
1094 3073236 : unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1095 4437033 : if (NewMaxPressure[ID] >= Limit - 2) {
1096 4378277 : LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1097 : << NewMaxPressure[ID]
1098 : << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1099 2952903 : << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1100 52488 : << " livethru)\n");
1101 2900415 : }
1102 279626 : }
1103 139813 : }
1104 :
1105 : /// Update the PressureDiff array for liveness after scheduling this
1106 2900415 : /// instruction.
1107 : void ScheduleDAGMILive::updatePressureDiffs(
1108 : ArrayRef<RegisterMaskPair> LiveUses) {
1109 : for (const RegisterMaskPair &P : LiveUses) {
1110 : unsigned Reg = P.RegUnit;
1111 : /// FIXME: Currently assuming single-use physregs.
1112 : if (!TRI->isVirtualRegister(Reg))
1113 : continue;
1114 :
1115 1536618 : if (ShouldTrackLaneMasks) {
1116 : // If the register has just become live then other uses won't change
1117 : // this fact anymore => decrement pressure.
1118 : // If the register has just become dead then other uses make it come
1119 1687613 : // back to life => increment pressure.
1120 : bool Decrement = P.LaneMask.any();
1121 3192715 :
1122 1505102 : for (const VReg2SUnit &V2SU
1123 : : make_range(VRegUses.find(Reg), VRegUses.end())) {
1124 1505102 : SUnit &SU = *V2SU.SU;
1125 : if (SU.isScheduled || &SU == &ExitSU)
1126 : continue;
1127 1213583 :
1128 : PressureDiff &PDiff = getPressureDiff(&SU);
1129 : PDiff.addPressureChange(Reg, Decrement, &MRI);
1130 : LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1131 : << printReg(Reg, TRI) << ':'
1132 362864 : << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1133 : dbgs() << " to "; PDiff.dump(*TRI););
1134 : }
1135 362864 : } else {
1136 560019 : assert(P.LaneMask.any());
1137 560019 : LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1138 : // This may be called before CurrentBottom has been initialized. However,
1139 : // BotRPTracker must have a valid position. We want the value live into the
1140 318726 : // instruction or live out of the block, so ask for the previous
1141 318726 : // instruction's live-out.
1142 : const LiveInterval &LI = LIS->getInterval(Reg);
1143 : VNInfo *VNI;
1144 : MachineBasicBlock::const_iterator I =
1145 : nextIfDebug(BotRPTracker.getPos(), BB->end());
1146 : if (I == BB->end())
1147 : VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1148 : else {
1149 : LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1150 : VNI = LRQ.valueIn();
1151 : }
1152 : // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1153 : assert(VNI && "No live value at use.");
1154 850719 : for (const VReg2SUnit &V2SU
1155 : : make_range(VRegUses.find(Reg), VRegUses.end())) {
1156 : SUnit *SU = V2SU.SU;
1157 1701438 : // If this use comes before the reaching def, it cannot be a last use,
1158 1701438 : // so decrease its pressure change.
1159 156904 : if (!SU->isScheduled && SU != &ExitSU) {
1160 : LiveQueryResult LRQ =
1161 772267 : LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1162 772267 : if (LRQ.valueIn() == VNI) {
1163 : PressureDiff &PDiff = getPressureDiff(SU);
1164 : PDiff.addPressureChange(Reg, true, &MRI);
1165 : LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1166 : << *SU->getInstr();
1167 850719 : dbgs() << " to "; PDiff.dump(*TRI););
1168 1858852 : }
1169 : }
1170 : }
1171 1858852 : }
1172 : }
1173 1558529 : }
1174 1558529 :
1175 1231535 : void ScheduleDAGMILive::dump() const {
1176 1231535 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1177 : if (EntrySU.getInstr() != nullptr)
1178 : dumpNodeAll(EntrySU);
1179 : for (const SUnit &SU : SUnits) {
1180 : dumpNodeAll(SU);
1181 : if (ShouldTrackPressure) {
1182 : dbgs() << " Pressure Diff : ";
1183 : getPressureDiff(&SU).dump(*TRI);
1184 : }
1185 1687613 : dbgs() << " Single Issue : ";
1186 : if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1187 0 : SchedModel.mustEndGroup(SU.getInstr()))
1188 : dbgs() << "true;";
1189 : else
1190 : dbgs() << "false;";
1191 : dbgs() << '\n';
1192 : }
1193 : if (ExitSU.getInstr() != nullptr)
1194 : dumpNodeAll(ExitSU);
1195 : #endif
1196 : }
1197 :
1198 : /// schedule - Called back from MachineScheduler::runOnMachineFunction
1199 : /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1200 : /// only includes instructions that have DAG nodes, not scheduling boundaries.
1201 : ///
1202 : /// This is a skeletal driver, with all the functionality pushed into helpers,
1203 : /// so that it can be easily extended by experimental schedulers. Generally,
1204 : /// implementing MachineSchedStrategy should be sufficient to implement a new
1205 : /// scheduling algorithm. However, if a scheduler further subclasses
1206 : /// ScheduleDAGMILive then it will want to override this virtual method in order
1207 : /// to update any specialized state.
1208 0 : void ScheduleDAGMILive::schedule() {
1209 : LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1210 : LLVM_DEBUG(SchedImpl->dumpPolicy());
1211 : buildDAGWithRegPressure();
1212 :
1213 : Topo.InitDAGTopologicalSorting();
1214 :
1215 : postprocessDAG();
1216 :
1217 : SmallVector<SUnit*, 8> TopRoots, BotRoots;
1218 : findRootsAndBiasEdges(TopRoots, BotRoots);
1219 :
1220 427711 : // Initialize the strategy before modifying the DAG.
1221 : // This may initialize a DFSResult to be used for queue priority.
1222 : SchedImpl->initialize(this);
1223 427711 :
1224 : LLVM_DEBUG(dump());
1225 427711 : if (PrintDAGs) dump();
1226 : if (ViewMISchedDAGs) viewGraph();
1227 427711 :
1228 : // Initialize ready queues now that the DAG and priority data are finalized.
1229 : initQueues(TopRoots, BotRoots);
1230 427711 :
1231 : bool IsTopNode = false;
1232 : while (true) {
1233 : LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1234 427711 : SUnit *SU = SchedImpl->pickNode(IsTopNode);
1235 : if (!SU) break;
1236 :
1237 : assert(!SU->isScheduled && "Node already scheduled");
1238 : if (!checkSchedLimit())
1239 : break;
1240 :
1241 427711 : scheduleMI(SU, IsTopNode);
1242 :
1243 427711 : if (DFSResult) {
1244 : unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1245 : if (!ScheduledTrees.test(SubtreeID)) {
1246 2988665 : ScheduledTrees.set(SubtreeID);
1247 2988665 : DFSResult->scheduleTree(SubtreeID);
1248 : SchedImpl->scheduleTree(SubtreeID);
1249 : }
1250 2560954 : }
1251 :
1252 : // Notify the scheduling strategy after updating the DAG.
1253 2560954 : SchedImpl->schedNode(SU, IsTopNode);
1254 :
1255 2560954 : updateQueues(SU, IsTopNode);
1256 : }
1257 176 : assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1258 :
1259 38 : placeDebugValues();
1260 38 :
1261 : LLVM_DEBUG({
1262 : dbgs() << "*** Final schedule for "
1263 : << printMBBReference(*begin()->getParent()) << " ***\n";
1264 : dumpSchedule();
1265 2560954 : dbgs() << '\n';
1266 : });
1267 2560954 : }
1268 2560954 :
1269 : /// Build the DAG and setup three register pressure trackers.
1270 : void ScheduleDAGMILive::buildDAGWithRegPressure() {
1271 427711 : if (!ShouldTrackPressure) {
1272 : RPTracker.reset();
1273 : RegionCriticalPSets.clear();
1274 : buildSchedGraph(AA);
1275 : return;
1276 : }
1277 :
1278 : // Initialize the register pressure tracker used by buildSchedGraph.
1279 427711 : RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1280 : ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1281 :
1282 432692 : // Account for liveness generate by the region boundary.
1283 432692 : if (LiveRegionEnd != RegionEnd)
1284 308268 : RPTracker.recede();
1285 :
1286 308268 : // Build the DAG, and compute current register pressure.
1287 308268 : buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1288 :
1289 : // Initialize top/bottom trackers after computing region pressure.
1290 : initRegPressure();
1291 124424 : }
1292 124424 :
1293 : void ScheduleDAGMILive::computeDFSResult() {
1294 : if (!DFSResult)
1295 124424 : DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1296 113476 : DFSResult->clear();
1297 : ScheduledTrees.clear();
1298 : DFSResult->resize(SUnits.size());
1299 124424 : DFSResult->compute(SUnits);
1300 : ScheduledTrees.resize(DFSResult->getNumSubtrees());
1301 : }
1302 124424 :
1303 : /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1304 : /// only provides the critical path for single block loops. To handle loops that
1305 10 : /// span blocks, we could use the vreg path latencies provided by
1306 10 : /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1307 8 : /// available for use in the scheduler.
1308 10 : ///
1309 : /// The cyclic path estimation identifies a def-use pair that crosses the back
1310 10 : /// edge and considers the depth and height of the nodes. For example, consider
1311 20 : /// the following instruction sequence where each instruction has unit latency
1312 20 : /// and defines an epomymous virtual register:
1313 10 : ///
1314 : /// a->b(a,c)->c(b)->d(c)->exit
1315 : ///
1316 : /// The cyclic critical path is a two cycles: b->c->b
1317 : /// The acyclic critical path is four cycles: a->b->c->d->exit
1318 : /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1319 : /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1320 : /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1321 : /// LiveInDepth = depth(b) = len(a->b) = 1
1322 : ///
1323 : /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1324 : /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1325 : /// CyclicCriticalPath = min(2, 2) = 2
1326 : ///
1327 : /// This could be relevant to PostRA scheduling, but is currently implemented
1328 : /// assuming LiveIntervals.
1329 : unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1330 : // This only applies to single block loop.
1331 : if (!BB->isSuccessor(BB))
1332 : return 0;
1333 :
1334 : unsigned MaxCyclicLatency = 0;
1335 : // Visit each live out vreg def to find def/use pairs that cross iterations.
1336 : for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1337 : unsigned Reg = P.RegUnit;
1338 : if (!TRI->isVirtualRegister(Reg))
1339 : continue;
1340 : const LiveInterval &LI = LIS->getInterval(Reg);
1341 389637 : const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1342 : if (!DefVNI)
1343 389637 : continue;
1344 :
1345 : MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1346 : const SUnit *DefSU = getSUnit(DefMI);
1347 : if (!DefSU)
1348 6938 : continue;
1349 4133 :
1350 4133 : unsigned LiveOutHeight = DefSU->getHeight();
1351 : unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1352 4127 : // Visit all local users of the vreg def.
1353 8254 : for (const VReg2SUnit &V2SU
1354 4127 : : make_range(VRegUses.find(Reg), VRegUses.end())) {
1355 : SUnit *SU = V2SU.SU;
1356 : if (SU == &ExitSU)
1357 : continue;
1358 :
1359 1694 : // Only consider uses of the phi.
1360 2290 : LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1361 : if (!LRQ.valueIn()->isPHIDef())
1362 : continue;
1363 1694 :
1364 : // Assume that a path spanning two iterations is a cycle, which could
1365 : // overestimate in strange cases. This allows cyclic latency to be
1366 1694 : // estimated as the minimum slack of the vreg's depth or height.
1367 6417 : unsigned CyclicLatency = 0;
1368 6417 : if (LiveOutDepth > SU->getDepth())
1369 687 : CyclicLatency = LiveOutDepth - SU->getDepth();
1370 :
1371 : unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1372 6417 : if (LiveInHeight > LiveOutHeight) {
1373 12834 : if (LiveInHeight - LiveOutHeight < CyclicLatency)
1374 : CyclicLatency = LiveInHeight - LiveOutHeight;
1375 : } else
1376 : CyclicLatency = 0;
1377 :
1378 : LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1379 : << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1380 5730 : if (CyclicLatency > MaxCyclicLatency)
1381 5717 : MaxCyclicLatency = CyclicLatency;
1382 : }
1383 5730 : }
1384 5730 : LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1385 5719 : return MaxCyclicLatency;
1386 : }
1387 :
1388 : /// Release ExitSU predecessors and setup scheduler queues. Re-position
1389 : /// the Top RP tracker in case the region beginning has changed.
1390 : void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1391 : ArrayRef<SUnit*> BotRoots) {
1392 5730 : ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1393 : if (ShouldTrackPressure) {
1394 : assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1395 : TopRPTracker.setPos(CurrentTop);
1396 : }
1397 : }
1398 :
1399 : /// Move an instruction and update register pressure.
1400 : void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1401 : // Move the instruction to its new location in the instruction stream.
1402 432692 : MachineInstr *MI = SU->getInstr();
1403 :
1404 432692 : if (IsTopNode) {
1405 432692 : assert(SU->isTopReady() && "node still has unscheduled dependencies");
1406 : if (&*CurrentTop == MI)
1407 : CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1408 : else {
1409 432692 : moveInstruction(MI, CurrentTop);
1410 : TopRPTracker.setPos(MI);
1411 : }
1412 2590387 :
1413 : if (ShouldTrackPressure) {
1414 2590387 : // Update top scheduled pressure.
1415 : RegisterOperands RegOpers;
1416 2590387 : RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1417 : if (ShouldTrackLaneMasks) {
1418 100883 : // Adjust liveness and add missing dead+read-undef flags.
1419 85524 : SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1420 : RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1421 15359 : } else {
1422 : // Adjust for missing dead-def flags.
1423 : RegOpers.detectDeadDefs(*MI, *LIS);
1424 : }
1425 100883 :
1426 : TopRPTracker.advance(RegOpers);
1427 86905 : assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1428 86905 : LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1429 86905 : TopRPTracker.getRegSetPressureAtPos(), TRI););
1430 :
1431 69909 : updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1432 69909 : }
1433 : } else {
1434 : assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1435 16996 : MachineBasicBlock::iterator priorII =
1436 : priorNonDebug(CurrentBottom, CurrentTop);
1437 : if (&*priorII == MI)
1438 86905 : CurrentBottom = priorII;
1439 : else {
1440 : if (&*CurrentTop == MI) {
1441 : CurrentTop = nextIfDebug(++CurrentTop, priorII);
1442 : TopRPTracker.setPos(CurrentTop);
1443 86905 : }
1444 : moveInstruction(MI, CurrentBottom);
1445 : CurrentBottom = MI;
1446 : BotRPTracker.setPos(CurrentBottom);
1447 : }
1448 2489504 : if (ShouldTrackPressure) {
1449 2489504 : RegisterOperands RegOpers;
1450 2304867 : RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1451 : if (ShouldTrackLaneMasks) {
1452 184637 : // Adjust liveness and add missing dead+read-undef flags.
1453 23041 : SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1454 : RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1455 : } else {
1456 184637 : // Adjust for missing dead-def flags.
1457 184637 : RegOpers.detectDeadDefs(*MI, *LIS);
1458 : }
1459 :
1460 2489504 : if (BotRPTracker.getPos() != CurrentBottom)
1461 1449713 : BotRPTracker.recedeSkipDebugValues();
1462 1449713 : SmallVector<RegisterMaskPair, 8> LiveUses;
1463 1449713 : BotRPTracker.recede(RegOpers, &LiveUses);
1464 : assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1465 271527 : LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1466 271527 : BotRPTracker.getRegSetPressureAtPos(), TRI););
1467 :
1468 : updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1469 1178186 : updatePressureDiffs(LiveUses);
1470 : }
1471 : }
1472 1449713 : }
1473 1294584 :
1474 : //===----------------------------------------------------------------------===//
1475 1449713 : // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1476 : //===----------------------------------------------------------------------===//
1477 :
1478 : namespace {
1479 :
1480 1449713 : /// Post-process the DAG to create cluster edges between neighboring
1481 1449713 : /// loads or between neighboring stores.
1482 : class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1483 : struct MemOpInfo {
1484 2590387 : SUnit *SU;
1485 : unsigned BaseReg;
1486 : int64_t Offset;
1487 :
1488 : MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
1489 : : SU(su), BaseReg(reg), Offset(ofs) {}
1490 :
1491 : bool operator<(const MemOpInfo&RHS) const {
1492 : return std::tie(BaseReg, Offset, SU->NodeNum) <
1493 : std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
1494 : }
1495 : };
1496 :
1497 : const TargetInstrInfo *TII;
1498 : const TargetRegisterInfo *TRI;
1499 : bool IsLoad;
1500 :
1501 0 : public:
1502 : BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1503 : const TargetRegisterInfo *tri, bool IsLoad)
1504 0 : : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1505 0 :
1506 : void apply(ScheduleDAGInstrs *DAGInstrs) override;
1507 :
1508 : protected:
1509 : void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
1510 : };
1511 :
1512 : class StoreClusterMutation : public BaseMemOpClusterMutation {
1513 : public:
1514 : StoreClusterMutation(const TargetInstrInfo *tii,
1515 : const TargetRegisterInfo *tri)
1516 66526 : : BaseMemOpClusterMutation(tii, tri, false) {}
1517 : };
1518 :
1519 : class LoadClusterMutation : public BaseMemOpClusterMutation {
1520 : public:
1521 : LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1522 : : BaseMemOpClusterMutation(tii, tri, true) {}
1523 : };
1524 0 :
1525 : } // end anonymous namespace
1526 :
1527 : namespace llvm {
1528 33263 :
1529 : std::unique_ptr<ScheduleDAGMutation>
1530 : createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1531 0 : const TargetRegisterInfo *TRI) {
1532 : return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1533 : : nullptr;
1534 33263 : }
1535 :
1536 : std::unique_ptr<ScheduleDAGMutation>
1537 : createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1538 : const TargetRegisterInfo *TRI) {
1539 : return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1540 : : nullptr;
1541 : }
1542 33263 :
1543 : } // end namespace llvm
1544 33263 :
1545 33263 : void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1546 : ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
1547 : SmallVector<MemOpInfo, 32> MemOpRecords;
1548 : for (SUnit *SU : MemOps) {
1549 33263 : unsigned BaseReg;
1550 : int64_t Offset;
1551 33263 : if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
1552 33263 : MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
1553 : }
1554 : if (MemOpRecords.size() < 2)
1555 : return;
1556 :
1557 0 : llvm::sort(MemOpRecords);
1558 : unsigned ClusterLength = 1;
1559 0 : for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1560 0 : SUnit *SUa = MemOpRecords[Idx].SU;
1561 : SUnit *SUb = MemOpRecords[Idx+1].SU;
1562 : if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
1563 0 : *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
1564 0 : ClusterLength) &&
1565 : DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1566 0 : LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1567 0 : << SUb->NodeNum << ")\n");
1568 : // Copy successor edges from SUa to SUb. Interleaving computation
1569 0 : // dependent on SUa can prevent load combining due to register reuse.
1570 : // Predecessor edges do not need to be copied from SUb to SUa since nearby
1571 0 : // loads should have effectively the same inputs.
1572 0 : for (const SDep &Succ : SUa->Succs) {
1573 0 : if (Succ.getSUnit() == SUb)
1574 0 : continue;
1575 0 : LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1576 0 : << ")\n");
1577 0 : DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1578 : }
1579 : ++ClusterLength;
1580 : } else
1581 : ClusterLength = 1;
1582 : }
1583 : }
1584 0 :
1585 0 : /// Callback from DAG postProcessing to create cluster edges for loads.
1586 0 : void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
1587 : ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1588 :
1589 0 : // Map DAG NodeNum to store chain ID.
1590 : DenseMap<unsigned, unsigned> StoreChainIDs;
1591 0 : // Map each store chain to a set of dependent MemOps.
1592 : SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1593 : for (SUnit &SU : DAG->SUnits) {
1594 : if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1595 : (!IsLoad && !SU.getInstr()->mayStore()))
1596 : continue;
1597 :
1598 72604 : unsigned ChainPredID = DAG->SUnits.size();
1599 : for (const SDep &Pred : SU.Preds) {
1600 : if (Pred.isCtrl()) {
1601 : ChainPredID = Pred.getSUnit()->NodeNum;
1602 : break;
1603 : }
1604 72604 : }
1605 910148 : // Check if this chain-like pred has been seen
1606 837544 : // before. ChainPredID==MaxNodeID at the top of the schedule.
1607 477159 : unsigned NumChains = StoreChainDependents.size();
1608 735426 : std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1609 : StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1610 204236 : if (Result.second)
1611 279140 : StoreChainDependents.resize(NumChains + 1);
1612 213983 : StoreChainDependents[Result.first->second].push_back(&SU);
1613 36961 : }
1614 36961 :
1615 : // Iterate over the store chains.
1616 : for (auto &SCD : StoreChainDependents)
1617 : clusterNeighboringMemOps(SCD, DAG);
1618 : }
1619 102118 :
1620 : //===----------------------------------------------------------------------===//
1621 102118 : // CopyConstrain - DAG post-processing to encourage copy elimination.
1622 102118 : //===----------------------------------------------------------------------===//
1623 60308 :
1624 204236 : namespace {
1625 :
1626 : /// Post-process the DAG to create weak edges from all uses of a copy to
1627 : /// the one use that defines the copy's source vreg, most likely an induction
1628 132912 : /// variable increment.
1629 60308 : class CopyConstrain : public ScheduleDAGMutation {
1630 72604 : // Transient state.
1631 : SlotIndex RegionBeginIdx;
1632 :
1633 : // RegionEndIdx is the slot index of the last non-debug instruction in the
1634 : // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1635 : SlotIndex RegionEndIdx;
1636 :
1637 : public:
1638 : CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1639 :
1640 : void apply(ScheduleDAGInstrs *DAGInstrs) override;
1641 0 :
1642 : protected:
1643 : void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1644 : };
1645 :
1646 : } // end anonymous namespace
1647 :
1648 : namespace llvm {
1649 :
1650 145282 : std::unique_ptr<ScheduleDAGMutation>
1651 : createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
1652 : const TargetRegisterInfo *TRI) {
1653 : return llvm::make_unique<CopyConstrain>(TII, TRI);
1654 : }
1655 :
1656 : } // end namespace llvm
1657 :
1658 : /// constrainLocalCopy handles two possibilities:
1659 : /// 1) Local src:
1660 : /// I0: = dst
1661 : /// I1: src = ...
1662 : /// I2: = dst
1663 145282 : /// I3: dst = src (copy)
1664 : /// (create pred->succ edges I0->I1, I2->I1)
1665 145282 : ///
1666 : /// 2) Local copy:
1667 : /// I0: dst = src (copy)
1668 : /// I1: = dst
1669 : /// I2: src = ...
1670 : /// I3: = dst
1671 : /// (create pred->succ edges I1->I2, I3->I2)
1672 : ///
1673 : /// Although the MachineScheduler is currently constrained to single blocks,
1674 : /// this algorithm should handle extended blocks. An EBB is a set of
1675 : /// contiguously numbered blocks such that the previous block in the EBB is
1676 : /// always the single predecessor.
1677 : void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1678 : LiveIntervals *LIS = DAG->getLIS();
1679 : MachineInstr *Copy = CopySU->getInstr();
1680 :
1681 : // Check for pure vreg copies.
1682 : const MachineOperand &SrcOp = Copy->getOperand(1);
1683 : unsigned SrcReg = SrcOp.getReg();
1684 : if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1685 : return;
1686 :
1687 : const MachineOperand &DstOp = Copy->getOperand(0);
1688 : unsigned DstReg = DstOp.getReg();
1689 0 : if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1690 0 : return;
1691 0 :
1692 : // Check if either the dest or source is local. If it's live across a back
1693 : // edge, it's not local. Note that if both vregs are live across the back
1694 0 : // edge, we cannot successfully contrain the copy without cyclic scheduling.
1695 0 : // If both the copy's source and dest are local live intervals, then we
1696 0 : // should treat the dest as the global for the purpose of adding
1697 0 : // constraints. This adds edges from source's other uses to the copy.
1698 : unsigned LocalReg = SrcReg;
1699 : unsigned GlobalReg = DstReg;
1700 0 : LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1701 0 : if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1702 0 : LocalReg = DstReg;
1703 : GlobalReg = SrcReg;
1704 : LocalLI = &LIS->getInterval(LocalReg);
1705 : if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1706 : return;
1707 : }
1708 : LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1709 :
1710 : // Find the global segment after the start of the local LI.
1711 : LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1712 0 : // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1713 0 : // local live range. We could create edges from other global uses to the local
1714 : // start, but the coalescer should have already eliminated these cases, so
1715 : // don't bother dealing with it.
1716 0 : if (GlobalSegment == GlobalLI->end())
1717 0 : return;
1718 0 :
1719 : // If GlobalSegment is killed at the LocalLI->start, the call to find()
1720 0 : // returned the next global segment. But if GlobalSegment overlaps with
1721 : // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1722 : // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1723 0 : if (GlobalSegment->contains(LocalLI->beginIndex()))
1724 : ++GlobalSegment;
1725 :
1726 : if (GlobalSegment == GlobalLI->end())
1727 : return;
1728 0 :
1729 0 : // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1730 : if (GlobalSegment != GlobalLI->begin()) {
1731 : // Two address defs have no hole.
1732 : if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1733 : GlobalSegment->start)) {
1734 : return;
1735 0 : }
1736 0 : // If the prior global segment may be defined by the same two-address
1737 : // instruction that also defines LocalLI, then can't make a hole here.
1738 0 : if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1739 0 : LocalLI->beginIndex())) {
1740 : return;
1741 : }
1742 0 : // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1743 : // it would be a disconnected component in the live range.
1744 0 : assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1745 : "Disconnected LRG within the scheduling region.");
1746 0 : }
1747 : MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1748 : if (!GlobalDef)
1749 : return;
1750 0 :
1751 : SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1752 0 : if (!GlobalSU)
1753 : return;
1754 :
1755 : // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1756 : // constraining the uses of the last local def to precede GlobalDef.
1757 : SmallVector<SUnit*,8> LocalUses;
1758 : const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1759 : MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1760 0 : SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1761 0 : for (const SDep &Succ : LastLocalSU->Succs) {
1762 : if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1763 : continue;
1764 0 : if (Succ.getSUnit() == GlobalSU)
1765 0 : continue;
1766 : if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1767 : return;
1768 : LocalUses.push_back(Succ.getSUnit());
1769 : }
1770 0 : // Open the top of the GlobalLI hole by constraining any earlier global uses
1771 : // to precede the start of LocalLI.
1772 : SmallVector<SUnit*,8> GlobalUses;
1773 0 : MachineInstr *FirstLocalDef =
1774 0 : LIS->getInstructionFromIndex(LocalLI->beginIndex());
1775 0 : SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1776 0 : for (const SDep &Pred : GlobalSU->Preds) {
1777 0 : if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1778 0 : continue;
1779 0 : if (Pred.getSUnit() == FirstLocalSU)
1780 0 : continue;
1781 : if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1782 : return;
1783 : GlobalUses.push_back(Pred.getSUnit());
1784 : }
1785 : LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1786 : // Add the weak edges.
1787 : for (SmallVectorImpl<SUnit*>::const_iterator
1788 0 : I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1789 0 : LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1790 0 : << GlobalSU->NodeNum << ")\n");
1791 0 : DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1792 0 : }
1793 0 : for (SmallVectorImpl<SUnit*>::const_iterator
1794 0 : I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1795 0 : LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1796 : << FirstLocalSU->NodeNum << ")\n");
1797 : DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1798 : }
1799 0 : }
1800 0 :
1801 : /// Callback from DAG postProcessing to create weak edges to encourage
1802 : /// copy elimination.
1803 0 : void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1804 : ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1805 0 : assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1806 0 :
1807 : MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1808 : if (FirstPos == DAG->end())
1809 0 : return;
1810 : RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1811 : RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1812 : *priorNonDebug(DAG->end(), DAG->begin()));
1813 :
1814 : for (SUnit &SU : DAG->SUnits) {
1815 408886 : if (!SU.getInstr()->isCopy())
1816 : continue;
1817 :
1818 : constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1819 408886 : }
1820 408886 : }
1821 :
1822 408407 : //===----------------------------------------------------------------------===//
1823 : // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1824 816814 : // and possibly other custom schedulers.
1825 : //===----------------------------------------------------------------------===//
1826 2602894 :
1827 4388974 : static const unsigned InvalidCycle = ~0U;
1828 :
1829 : SchedBoundary::~SchedBoundary() { delete HazardRec; }
1830 584643 :
1831 : /// Given a Count of resource usage and a Latency value, return true if a
1832 : /// SchedBoundary becomes resource limited.
1833 : static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1834 : unsigned Latency) {
1835 : return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1836 : }
1837 :
1838 : void SchedBoundary::reset() {
1839 : // A new HazardRec is created for each DAG and owned by SchedBoundary.
1840 : // Destroying and reconstructing it is very expensive though. So keep
1841 685000 : // invalid, placeholder HazardRecs.
1842 : if (HazardRec && HazardRec->isEnabled()) {
1843 : delete HazardRec;
1844 : HazardRec = nullptr;
1845 : }
1846 : Available.clear();
1847 3897554 : Pending.clear();
1848 : CheckPending = false;
1849 : CurrCycle = 0;
1850 1210938 : CurrMOps = 0;
1851 : MinReadyCycle = std::numeric_limits<unsigned>::max();
1852 : ExpectedLatency = 0;
1853 : DependentLatency = 0;
1854 1210938 : RetiredMOps = 0;
1855 7970 : MaxExecutedResCount = 0;
1856 7970 : ZoneCritResIdx = 0;
1857 : IsResourceLimited = false;
1858 : ReservedCycles.clear();
1859 : #ifndef NDEBUG
1860 1210938 : // Track the maximum number of stall cycles that could arise either from the
1861 1210938 : // latency of a DAG edge or the number of cycles that a processor resource is
1862 1210938 : // reserved (SchedBoundary::ReservedCycles).
1863 1210938 : MaxObservedStall = 0;
1864 1210938 : #endif
1865 1210938 : // Reserve a zero-count for invalid CritResIdx.
1866 1210938 : ExecutedResCounts.resize(1);
1867 1210938 : assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1868 1210938 : }
1869 1210938 :
1870 : void SchedRemainder::
1871 : init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1872 : reset();
1873 : if (!SchedModel->hasInstrSchedModel())
1874 : return;
1875 : RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1876 : for (SUnit &SU : DAG->SUnits) {
1877 : const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1878 1210938 : RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1879 : * SchedModel->getMicroOpFactor();
1880 1210938 : for (TargetSchedModel::ProcResIter
1881 : PI = SchedModel->getWriteProcResBegin(SC),
1882 443077 : PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1883 : unsigned PIdx = PI->ProcResourceIdx;
1884 : unsigned Factor = SchedModel->getResourceFactor(PIdx);
1885 443077 : RemainingCounts[PIdx] += (Factor * PI->Cycles);
1886 : }
1887 375370 : }
1888 1275815 : }
1889 1088130 :
1890 1088130 : void SchedBoundary::
1891 1088130 : init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1892 2118523 : reset();
1893 1088130 : DAG = dag;
1894 3206653 : SchedModel = smodel;
1895 2118523 : Rem = rem;
1896 : if (SchedModel->hasInstrSchedModel()) {
1897 2118523 : ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1898 : ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1899 : }
1900 : }
1901 :
1902 868438 : /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1903 : /// these "soft stalls" differently than the hard stall cycles based on CPU
1904 868438 : /// resources and computed by checkHazard(). A fully in-order model
1905 868438 : /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1906 868438 : /// available for scheduling until they are ready. However, a weaker in-order
1907 868438 : /// model may use this for heuristics. For example, if a processor has in-order
1908 868438 : /// behavior when reading certain resources, this may come into play.
1909 746070 : unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1910 746070 : if (!SU->isUnbuffered)
1911 : return 0;
1912 868438 :
1913 : unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1914 : if (ReadyCycle > CurrCycle)
1915 : return ReadyCycle - CurrCycle;
1916 : return 0;
1917 : }
1918 :
1919 : /// Compute the next cycle at which the given processor resource can be
1920 : /// scheduled.
1921 22732034 : unsigned SchedBoundary::
1922 22732034 : getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1923 : unsigned NextUnreserved = ReservedCycles[PIdx];
1924 : // If this resource has never been used, always return cycle zero.
1925 2781651 : if (NextUnreserved == InvalidCycle)
1926 2781651 : return 0;
1927 79489 : // For bottom-up scheduling add the cycles needed for the current operation.
1928 : if (!isTop())
1929 : NextUnreserved += Cycles;
1930 : return NextUnreserved;
1931 : }
1932 :
1933 2121405 : /// Does this SU have a hazard within the current instruction group.
1934 : ///
1935 2121405 : /// The scheduler supports two modes of hazard recognition. The first is the
1936 : /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1937 2121405 : /// supports highly complicated in-order reservation tables
1938 : /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1939 : ///
1940 2232 : /// The second is a streamlined mechanism that checks for hazards based on
1941 944 : /// simple counters that the scheduler itself maintains. It explicitly checks
1942 : /// for instruction dispatch limitations, including the number of micro-ops that
1943 : /// can dispatch per cycle.
1944 : ///
1945 : /// TODO: Also check whether the SU must start a new group.
1946 : bool SchedBoundary::checkHazard(SUnit *SU) {
1947 : if (HazardRec->isEnabled()
1948 : && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1949 : return true;
1950 : }
1951 :
1952 : unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1953 : if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1954 : LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1955 : << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1956 : return true;
1957 : }
1958 11558048 :
1959 11558048 : if (CurrMOps > 0 &&
1960 11558048 : ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
1961 : (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
1962 : LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
1963 : << (isTop() ? "begin" : "end") << " group\n");
1964 11533520 : return true;
1965 11533520 : }
1966 :
1967 : if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1968 : const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1969 : for (const MCWriteProcResEntry &PE :
1970 : make_range(SchedModel->getWriteProcResBegin(SC),
1971 11435096 : SchedModel->getWriteProcResEnd(SC))) {
1972 8497979 : unsigned ResIdx = PE.ProcResourceIdx;
1973 8444773 : unsigned Cycles = PE.Cycles;
1974 : unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
1975 : if (NRCycle > CurrCycle) {
1976 287 : #ifndef NDEBUG
1977 : MaxObservedStall = std::max(Cycles, MaxObservedStall);
1978 : #endif
1979 11434809 : LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1980 1891 : << SchedModel->getResourceName(ResIdx) << "="
1981 1927 : << NRCycle << "c\n");
1982 : return true;
1983 3818 : }
1984 2521 : }
1985 2521 : }
1986 2521 : return false;
1987 2521 : }
1988 :
1989 : // Find the unscheduled node in ReadySUs with the highest latency.
1990 : unsigned SchedBoundary::
1991 : findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1992 : SUnit *LateSU = nullptr;
1993 : unsigned RemLatency = 0;
1994 : for (SUnit *SU : ReadySUs) {
1995 : unsigned L = getUnscheduledLatency(SU);
1996 : if (L > RemLatency) {
1997 : RemLatency = L;
1998 : LateSU = SU;
1999 : }
2000 : }
2001 : if (LateSU) {
2002 1013716 : LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2003 : << LateSU->NodeNum << ") " << RemLatency << "c\n");
2004 : }
2005 : return RemLatency;
2006 5873676 : }
2007 4859960 :
2008 4859960 : // Count resources in this zone and the remaining unscheduled
2009 : // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2010 : // resource index, or zero if the zone is issue limited.
2011 : unsigned SchedBoundary::
2012 : getOtherResourceCount(unsigned &OtherCritIdx) {
2013 : OtherCritIdx = 0;
2014 : if (!SchedModel->hasInstrSchedModel())
2015 : return 0;
2016 :
2017 1013716 : unsigned OtherCritCount = Rem->RemIssueCount
2018 : + (RetiredMOps * SchedModel->getMicroOpFactor());
2019 : LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2020 : << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2021 : for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2022 : PIdx != PEnd; ++PIdx) {
2023 638914 : unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2024 : if (OtherCount > OtherCritCount) {
2025 638914 : OtherCritCount = OtherCount;
2026 638914 : OtherCritIdx = PIdx;
2027 : }
2028 : }
2029 443466 : if (OtherCritIdx) {
2030 443466 : LLVM_DEBUG(
2031 : dbgs() << " " << Available.getName() << " + Remain CritRes: "
2032 : << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2033 2832242 : << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2034 3275708 : }
2035 2832242 : return OtherCritCount;
2036 2832242 : }
2037 :
2038 16902 : void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2039 : assert(SU->getInstr() && "Scheduled SUnit must have instr");
2040 :
2041 : #ifndef NDEBUG
2042 : // ReadyCycle was been bumped up to the CurrCycle when this node was
2043 : // scheduled, but CurrCycle may have been eagerly advanced immediately after
2044 : // scheduling, so may now be greater than ReadyCycle.
2045 : if (ReadyCycle > CurrCycle)
2046 : MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2047 443466 : #endif
2048 :
2049 : if (ReadyCycle < MinReadyCycle)
2050 3733002 : MinReadyCycle = ReadyCycle;
2051 :
2052 : // Check for interlocks first. For the purpose of other heuristics, an
2053 : // instruction that cannot issue appears as if it's not in the ReadyQueue.
2054 : bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2055 : if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2056 : Available.size() >= ReadyListLimit)
2057 : Pending.push(SU);
2058 : else
2059 : Available.push(SU);
2060 : }
2061 3733002 :
2062 876399 : /// Move the boundary of scheduled code by one cycle.
2063 : void SchedBoundary::bumpCycle(unsigned NextCycle) {
2064 : if (SchedModel->getMicroOpBufferSize() == 0) {
2065 : assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2066 3733002 : "MinReadyCycle uninitialized");
2067 3733002 : if (MinReadyCycle > NextCycle)
2068 : NextCycle = MinReadyCycle;
2069 178750 : }
2070 : // Update the current micro-ops, which will issue in the next cycle.
2071 3554252 : unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2072 3733002 : CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2073 :
2074 : // Decrement DependentLatency based on the next cycle.
2075 887518 : if ((NextCycle - CurrCycle) > DependentLatency)
2076 887518 : DependentLatency = 0;
2077 : else
2078 : DependentLatency -= (NextCycle - CurrCycle);
2079 277038 :
2080 : if (!HazardRec->isEnabled()) {
2081 : // Bypass HazardRec virtual calls.
2082 : CurrCycle = NextCycle;
2083 887518 : } else {
2084 887518 : // Bypass getHazardType calls in case of long latency.
2085 : for (; CurrCycle != NextCycle; ++CurrCycle) {
2086 : if (isTop())
2087 887518 : HazardRec->AdvanceCycle();
2088 133628 : else
2089 : HazardRec->RecedeCycle();
2090 753890 : }
2091 : }
2092 887518 : CheckPending = true;
2093 : IsResourceLimited =
2094 814961 : checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2095 : getScheduledLatency());
2096 :
2097 170308 : LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2098 97751 : << '\n');
2099 44226 : }
2100 :
2101 53525 : void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2102 : ExecutedResCounts[PIdx] += Count;
2103 : if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2104 887518 : MaxExecutedResCount = ExecutedResCounts[PIdx];
2105 887518 : }
2106 887518 :
2107 : /// Add the given processor resource to this scheduled zone.
2108 : ///
2109 : /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2110 : /// during which this resource is consumed.
2111 887518 : ///
2112 : /// \return the next cycle at which the instruction may execute without
2113 2118523 : /// oversubscribing resources.
2114 2118523 : unsigned SchedBoundary::
2115 2118523 : countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2116 687938 : unsigned Factor = SchedModel->getResourceFactor(PIdx);
2117 2118523 : unsigned Count = Factor * Cycles;
2118 : LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2119 : << Cycles << "x" << Factor << "u\n");
2120 :
2121 : // Update Executed resources counts.
2122 : incExecutedResources(PIdx, Count);
2123 : assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2124 : Rem->RemainingCounts[PIdx] -= Count;
2125 :
2126 2118523 : // Check if this resource exceeds the current critical resource. If so, it
2127 : // becomes the critical resource.
2128 2118523 : if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2129 2118523 : ZoneCritResIdx = PIdx;
2130 : LLVM_DEBUG(dbgs() << " *** Critical resource "
2131 : << SchedModel->getResourceName(PIdx) << ": "
2132 : << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2133 : << "c\n");
2134 2118523 : }
2135 : // For reserved resources, record the highest cycle using the resource.
2136 2118523 : unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2137 : if (NextAvailable > CurrCycle) {
2138 : LLVM_DEBUG(dbgs() << " Resource conflict: "
2139 : << SchedModel->getProcResource(PIdx)->Name
2140 4050755 : << " reserved until @" << NextAvailable << "\n");
2141 290115 : }
2142 : return NextAvailable;
2143 : }
2144 :
2145 : /// Move the boundary of scheduled code by one SUnit.
2146 : void SchedBoundary::bumpNode(SUnit *SU) {
2147 : // Update the reservation table.
2148 2118523 : if (HazardRec->isEnabled()) {
2149 : if (!isTop() && SU->isCall) {
2150 : // Calls are scheduled with their preceding instructions. For bottom-up
2151 : // scheduling, clear the pipeline state before emitting.
2152 : HazardRec->Reset();
2153 : }
2154 2118523 : HazardRec->EmitInstruction(SU);
2155 : }
2156 : // checkHazard should prevent scheduling multiple instructions per cycle that
2157 : // exceed the issue width.
2158 2591852 : const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2159 : unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2160 2591852 : assert(
2161 90705 : (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2162 : "Cannot schedule this instruction's MicroOps in the current cycle.");
2163 :
2164 0 : unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2165 : LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2166 90705 :
2167 : unsigned NextCycle = CurrCycle;
2168 : switch (SchedModel->getMicroOpBufferSize()) {
2169 : case 0:
2170 2591852 : assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2171 2591852 : break;
2172 : case 1:
2173 : if (ReadyCycle > NextCycle) {
2174 : NextCycle = ReadyCycle;
2175 : LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2176 2591852 : }
2177 : break;
2178 : default:
2179 2591852 : // We don't currently model the OOO reorder buffer, so consider all
2180 2591852 : // scheduled MOps to be "retired". We do loosely model in-order resource
2181 : // latency. If this instruction uses an in-order resource, account for any
2182 : // likely stall cycles.
2183 : if (SU->isUnbuffered && ReadyCycle > NextCycle)
2184 268267 : NextCycle = ReadyCycle;
2185 268267 : break;
2186 : }
2187 : RetiredMOps += IncMOps;
2188 :
2189 : // Update resource counts and critical resource.
2190 1995601 : if (SchedModel->hasInstrSchedModel()) {
2191 : unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2192 : assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2193 : Rem->RemIssueCount -= DecRemIssue;
2194 : if (ZoneCritResIdx) {
2195 1995601 : // Scale scheduled micro-ops for comparing with the critical resource.
2196 : unsigned ScaledMOps =
2197 : RetiredMOps * SchedModel->getMicroOpFactor();
2198 :
2199 2591852 : // If scaled micro-ops are now more than the previous critical resource by
2200 : // a full cycle, then micro-ops issue becomes critical.
2201 : if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2202 2591852 : >= (int)SchedModel->getLatencyFactor()) {
2203 1088130 : ZoneCritResIdx = 0;
2204 : LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2205 1088130 : << ScaledMOps / SchedModel->getLatencyFactor()
2206 1088130 : << "c\n");
2207 : }
2208 : }
2209 522272 : for (TargetSchedModel::ProcResIter
2210 : PI = SchedModel->getWriteProcResBegin(SC),
2211 : PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2212 : unsigned RCycle =
2213 1044544 : countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2214 522272 : if (RCycle > NextCycle)
2215 1649 : NextCycle = RCycle;
2216 : }
2217 : if (SU->hasReservedResource) {
2218 : // For reserved resources, record the highest cycle using the resource.
2219 : // For top-down scheduling, this is the cycle in which we schedule this
2220 : // instruction plus the number of cycles the operations reserves the
2221 2118523 : // resource. For bottom-up is it simply the instruction's cycle.
2222 1088130 : for (TargetSchedModel::ProcResIter
2223 3206653 : PI = SchedModel->getWriteProcResBegin(SC),
2224 : PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2225 2118523 : unsigned PIdx = PI->ProcResourceIdx;
2226 2118523 : if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2227 : if (isTop()) {
2228 : ReservedCycles[PIdx] =
2229 1088130 : std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2230 : }
2231 : else
2232 : ReservedCycles[PIdx] = NextCycle;
2233 : }
2234 809 : }
2235 661 : }
2236 1470 : }
2237 809 : // Update ExpectedLatency and DependentLatency.
2238 1618 : unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2239 661 : unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2240 361 : if (SU->getDepth() > TopLatency) {
2241 722 : TopLatency = SU->getDepth();
2242 : LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2243 : << SU->NodeNum << ") " << TopLatency << "c\n");
2244 300 : }
2245 : if (SU->getHeight() > BotLatency) {
2246 : BotLatency = SU->getHeight();
2247 : LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2248 : << SU->NodeNum << ") " << BotLatency << "c\n");
2249 : }
2250 2591852 : // If we stall for any reason, bump the cycle.
2251 2591852 : if (NextCycle > CurrCycle)
2252 2591852 : bumpCycle(NextCycle);
2253 504026 : else
2254 : // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2255 : // resource limited. If a stall occurred, bumpCycle does this.
2256 : IsResourceLimited =
2257 2591852 : checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2258 928748 : getScheduledLatency());
2259 :
2260 : // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2261 : // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2262 : // one cycle. Since we commonly reach the max MOps here, opportunistically
2263 2591852 : // bump the cycle to avoid uselessly checking everything in the readyQ.
2264 25280 : CurrMOps += IncMOps;
2265 :
2266 : // Bump the cycle count for issue group constraints.
2267 : // This must be done after NextCycle has been adjust for all other stalls.
2268 2566572 : // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2269 2566572 : // currCycle to X.
2270 : if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2271 : (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2272 : LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2273 : << " group\n");
2274 : bumpCycle(++NextCycle);
2275 : }
2276 2591852 :
2277 : while (CurrMOps >= SchedModel->getIssueWidth()) {
2278 : LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2279 : << CurrCycle << '\n');
2280 : bumpCycle(++NextCycle);
2281 : }
2282 2591852 : LLVM_DEBUG(dumpScheduledState());
2283 2410567 : }
2284 :
2285 : /// Release pending ready nodes in to the available queue. This makes them
2286 332 : /// visible to heuristics.
2287 : void SchedBoundary::releasePending() {
2288 : // If the available queue is empty, it is safe to reset MinReadyCycle.
2289 3295746 : if (Available.empty())
2290 : MinReadyCycle = std::numeric_limits<unsigned>::max();
2291 :
2292 703894 : // Check to see if any of the pending instructions are ready to issue. If
2293 : // so, add them to the available queue.
2294 : bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2295 2591852 : for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2296 : SUnit *SU = *(Pending.begin()+i);
2297 : unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2298 :
2299 767468 : if (ReadyCycle < MinReadyCycle)
2300 : MinReadyCycle = ReadyCycle;
2301 767468 :
2302 181432 : if (!IsBuffered && ReadyCycle > CurrCycle)
2303 : continue;
2304 :
2305 : if (checkHazard(SU))
2306 767468 : continue;
2307 1104626 :
2308 340524 : if (Available.size() >= ReadyListLimit)
2309 340524 : break;
2310 :
2311 340524 : Available.push(SU);
2312 190208 : Pending.remove(Pending.begin()+i);
2313 : --i; --e;
2314 340524 : }
2315 : CheckPending = false;
2316 : }
2317 258649 :
2318 : /// Remove SU from the ready set for this boundary.
2319 : void SchedBoundary::removeReady(SUnit *SU) {
2320 250626 : if (Available.isInQueue(SU))
2321 : Available.remove(Available.find(SU));
2322 : else {
2323 247260 : assert(Pending.isInQueue(SU) && "bad ready count");
2324 : Pending.remove(Pending.find(SU));
2325 247260 : }
2326 : }
2327 767468 :
2328 767468 : /// If this queue only has one ready candidate, return it. As a side effect,
2329 : /// defer any nodes that now hit a hazard, and advance the cycle until at least
2330 : /// one node is ready. If multiple instructions are ready, return NULL.
2331 3732992 : SUnit *SchedBoundary::pickOnlyChoice() {
2332 7465984 : if (CheckPending)
2333 3720406 : releasePending();
2334 :
2335 : if (CurrMOps > 0) {
2336 12586 : // Defer any ready instrs that now have a hazard.
2337 : for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2338 3732992 : if (checkHazard(*I)) {
2339 : Pending.push(*I);
2340 : I = Available.remove(I);
2341 : continue;
2342 : }
2343 2921829 : ++I;
2344 2921829 : }
2345 609456 : }
2346 : for (unsigned i = 0; Available.empty(); ++i) {
2347 2921829 : // FIXME: Re-enable assert once PR20057 is resolved.
2348 : // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2349 8912445 : // "permanent hazard");
2350 7686454 : (void)i;
2351 81099 : bumpCycle(CurrCycle + 1);
2352 : releasePending();
2353 81099 : }
2354 :
2355 : LLVM_DEBUG(Pending.dump());
2356 : LLVM_DEBUG(Available.dump());
2357 :
2358 3079841 : if (Available.size() == 1)
2359 : return *Available.begin();
2360 : return nullptr;
2361 : }
2362 :
2363 158012 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2364 158012 : // This is useful information to dump after bumpNode.
2365 : // Note that the Queue contents are more useful before pickNodeFromQueue.
2366 : LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2367 : unsigned ResFactor;
2368 : unsigned ResCount;
2369 : if (ZoneCritResIdx) {
2370 2921829 : ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2371 1145662 : ResCount = getResourceCount(ZoneCritResIdx);
2372 : } else {
2373 : ResFactor = SchedModel->getMicroOpFactor();
2374 : ResCount = RetiredMOps * ResFactor;
2375 : }
2376 : unsigned LFactor = SchedModel->getLatencyFactor();
2377 : dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2378 : << " Retired: " << RetiredMOps;
2379 : dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2380 : dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2381 : << ResCount / ResFactor << " "
2382 : << SchedModel->getResourceName(ZoneCritResIdx)
2383 : << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2384 : << (IsResourceLimited ? " - Resource" : " - Latency")
2385 : << " limited.\n";
2386 : }
2387 : #endif
2388 :
2389 : //===----------------------------------------------------------------------===//
2390 : // GenericScheduler - Generic implementation of MachineSchedStrategy.
2391 : //===----------------------------------------------------------------------===//
2392 :
2393 : void GenericSchedulerBase::SchedCandidate::
2394 : initResourceDelta(const ScheduleDAGMI *DAG,
2395 : const TargetSchedModel *SchedModel) {
2396 : if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2397 : return;
2398 :
2399 : const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2400 : for (TargetSchedModel::ProcResIter
2401 : PI = SchedModel->getWriteProcResBegin(SC),
2402 : PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2403 : if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2404 : ResDelta.CritResources += PI->Cycles;
2405 15479857 : if (PI->ProcResourceIdx == Policy.DemandResIdx)
2406 : ResDelta.DemandedResources += PI->Cycles;
2407 : }
2408 15479857 : }
2409 :
2410 : /// Compute remaining latency. We need this both to determine whether the
2411 16993 : /// overall schedule has become latency-limited and whether the instructions
2412 41698 : /// outside this zone are resource or latency limited.
2413 16993 : ///
2414 58691 : /// The "dependent" latency is updated incrementally during scheduling as the
2415 41698 : /// max height/depth of scheduled nodes minus the cycles since it was
2416 4093 : /// scheduled:
2417 41698 : /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2418 6376 : ///
2419 : /// The "independent" latency is the max ready queue depth:
2420 : /// ILat = max N.depth for N in Available|Pending
2421 : ///
2422 : /// RemainingLatency is the greater of independent and dependent latency.
2423 : ///
2424 : /// These computations are expensive, especially in DAGs with many edges, so
2425 : /// only do them if necessary.
2426 : static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2427 : unsigned RemLatency = CurrZone.getDependentLatency();
2428 : RemLatency = std::max(RemLatency,
2429 : CurrZone.findMaxLatency(CurrZone.Available.elements()));
2430 : RemLatency = std::max(RemLatency,
2431 : CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2432 : return RemLatency;
2433 : }
2434 :
2435 : /// Returns true if the current cycle plus remaning latency is greater than
2436 : /// the cirtical path in the scheduling region.
2437 : bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2438 506858 : SchedBoundary &CurrZone,
2439 506858 : bool ComputeRemLatency,
2440 506858 : unsigned &RemLatency) const {
2441 695732 : // The current cycle is already greater than the critical path, so we are
2442 506858 : // already latnecy limited and don't need to compute the remaining latency.
2443 506858 : if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2444 506858 : return true;
2445 :
2446 : // If we haven't scheduled anything yet, then we aren't latency limited.
2447 : if (CurrZone.getCurrCycle() == 0)
2448 : return false;
2449 424003 :
2450 : if (ComputeRemLatency)
2451 : RemLatency = computeRemLatency(CurrZone);
2452 :
2453 : return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2454 : }
2455 424003 :
2456 : /// Set the CandPolicy given a scheduling zone given the current resources and
2457 : /// latencies inside and outside the zone.
2458 : void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2459 378034 : SchedBoundary &CurrZone,
2460 : SchedBoundary *OtherZone) {
2461 : // Apply preemptive heuristics based on the total latency and resources
2462 250106 : // inside and outside this zone. Potential stalls should be considered before
2463 63394 : // following this policy.
2464 :
2465 250106 : // Compute the critical resource outside the zone.
2466 : unsigned OtherCritIdx = 0;
2467 : unsigned OtherCount =
2468 : OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2469 :
2470 665875 : bool OtherResLimited = false;
2471 : unsigned RemLatency = 0;
2472 : bool RemLatencyComputed = false;
2473 : if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2474 : RemLatency = computeRemLatency(CurrZone);
2475 : RemLatencyComputed = true;
2476 : OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2477 : OtherCount, RemLatency);
2478 665875 : }
2479 :
2480 665875 : // Schedule aggressively for latency in PostRA mode. We don't check for
2481 : // acyclic latency during PostRA, and highly out-of-order processors will
2482 : // skip PostRA scheduling.
2483 665875 : if (!OtherResLimited &&
2484 : (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2485 665875 : RemLatency))) {
2486 443464 : Policy.ReduceLatency |= true;
2487 : LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2488 443464 : << " RemainingLatency " << RemLatency << " + "
2489 : << CurrZone.getCurrCycle() << "c > CritPath "
2490 : << Rem.CriticalPath << "\n");
2491 : }
2492 : // If the same resource is limiting inside and outside the zone, do nothing.
2493 : if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2494 : return;
2495 665875 :
2496 424003 : LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2497 : dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2498 191550 : << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2499 : } if (OtherResLimited) dbgs()
2500 : << " RemainingLimit: "
2501 : << SchedModel->getResourceName(OtherCritIdx) << "\n";
2502 : if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2503 : << " Latency limited both directions.\n");
2504 :
2505 665875 : if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2506 653310 : Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2507 :
2508 : if (OtherResLimited)
2509 : Policy.DemandResIdx = OtherCritIdx;
2510 : }
2511 :
2512 : #ifndef NDEBUG
2513 : const char *GenericSchedulerBase::getReasonStr(
2514 : GenericSchedulerBase::CandReason Reason) {
2515 : switch (Reason) {
2516 : case NoCand: return "NOCAND ";
2517 12565 : case Only1: return "ONLY1 ";
2518 1525 : case PhysRegCopy: return "PREG-COPY ";
2519 : case RegExcess: return "REG-EXCESS";
2520 12565 : case RegCritical: return "REG-CRIT ";
2521 1444 : case Stall: return "STALL ";
2522 : case Cluster: return "CLUSTER ";
2523 : case Weak: return "WEAK ";
2524 : case RegMax: return "REG-MAX ";
2525 : case ResourceReduce: return "RES-REDUCE";
2526 : case ResourceDemand: return "RES-DEMAND";
2527 : case TopDepthReduce: return "TOP-DEPTH ";
2528 : case TopPathReduce: return "TOP-PATH ";
2529 : case BotHeightReduce:return "BOT-HEIGHT";
2530 : case BotPathReduce: return "BOT-PATH ";
2531 : case NextDefUse: return "DEF-USE ";
2532 : case NodeOrder: return "ORDER ";
2533 : };
2534 : llvm_unreachable("Unknown reason!");
2535 : }
2536 :
2537 : void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2538 : PressureChange P;
2539 : unsigned ResIdx = 0;
2540 : unsigned Latency = 0;
2541 : switch (Cand.Reason) {
2542 : default:
2543 : break;
2544 : case RegExcess:
2545 : P = Cand.RPDelta.Excess;
2546 : break;
2547 : case RegCritical:
2548 : P = Cand.RPDelta.CriticalMax;
2549 : break;
2550 : case RegMax:
2551 : P = Cand.RPDelta.CurrentMax;
2552 : break;
2553 : case ResourceReduce:
2554 : ResIdx = Cand.Policy.ReduceResIdx;
2555 : break;
2556 : case ResourceDemand:
2557 : ResIdx = Cand.Policy.DemandResIdx;
2558 : break;
2559 : case TopDepthReduce:
2560 : Latency = Cand.SU->getDepth();
2561 : break;
2562 : case TopPathReduce:
2563 : Latency = Cand.SU->getHeight();
2564 : break;
2565 : case BotHeightReduce:
2566 : Latency = Cand.SU->getHeight();
2567 : break;
2568 : case BotPathReduce:
2569 : Latency = Cand.SU->getDepth();
2570 : break;
2571 : }
2572 : dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2573 : if (P.isValid())
2574 : dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2575 : << ":" << P.getUnitInc() << " ";
2576 : else
2577 : dbgs() << " ";
2578 : if (ResIdx)
2579 : dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2580 : else
2581 : dbgs() << " ";
2582 : if (Latency)
2583 : dbgs() << " " << Latency << " cycles ";
2584 : else
2585 : dbgs() << " ";
2586 : dbgs() << '\n';
2587 : }
2588 : #endif
2589 :
2590 : namespace llvm {
2591 : /// Return true if this heuristic determines order.
2592 : bool tryLess(int TryVal, int CandVal,
2593 : GenericSchedulerBase::SchedCandidate &TryCand,
2594 : GenericSchedulerBase::SchedCandidate &Cand,
2595 : GenericSchedulerBase::CandReason Reason) {
2596 : if (TryVal < CandVal) {
2597 : TryCand.Reason = Reason;
2598 : return true;
2599 : }
2600 : if (TryVal > CandVal) {
2601 : if (Cand.Reason > Reason)
2602 : Cand.Reason = Reason;
2603 : return true;
2604 66527702 : }
2605 : return false;
2606 : }
2607 :
2608 66527702 : bool tryGreater(int TryVal, int CandVal,
2609 51343 : GenericSchedulerBase::SchedCandidate &TryCand,
2610 51343 : GenericSchedulerBase::SchedCandidate &Cand,
2611 : GenericSchedulerBase::CandReason Reason) {
2612 66476359 : if (TryVal > CandVal) {
2613 255865 : TryCand.Reason = Reason;
2614 58814 : return true;
2615 255865 : }
2616 : if (TryVal < CandVal) {
2617 : if (Cand.Reason > Reason)
2618 : Cand.Reason = Reason;
2619 : return true;
2620 69823791 : }
2621 : return false;
2622 : }
2623 :
2624 69823791 : bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2625 406228 : GenericSchedulerBase::SchedCandidate &Cand,
2626 406228 : SchedBoundary &Zone) {
2627 : if (Zone.isTop()) {
2628 69417563 : if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2629 963365 : if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2630 315029 : TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2631 963365 : return true;
2632 : }
2633 : if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2634 : TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2635 : return true;
2636 989361 : } else {
2637 : if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2638 : if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2639 989361 : TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2640 548746 : return true;
2641 6883 : }
2642 : if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2643 : TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2644 : return true;
2645 293671 : }
2646 : return false;
2647 88276 : }
2648 : } // end namespace llvm
2649 1429976 :
2650 8014 : static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2651 : LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2652 : << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2653 : }
2654 711950 :
2655 : static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
2656 283496 : tracePick(Cand.Reason, Cand.AtTop);
2657 : }
2658 :
2659 : void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2660 : assert(dag->hasVRegLiveness() &&
2661 : "(PreRA)GenericScheduler needs vreg liveness");
2662 : DAG = static_cast<ScheduleDAGMILive*>(dag);
2663 : SchedModel = DAG->getSchedModel();
2664 : TRI = DAG->TRI;
2665 :
2666 : Rem.init(DAG, SchedModel);
2667 0 : Top.init(DAG, SchedModel, &Rem);
2668 : Bot.init(DAG, SchedModel, &Rem);
2669 0 :
2670 : // Initialize resource counts.
2671 425361 :
2672 : // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2673 : // are disabled, then these HazardRecs will be disabled.
2674 425361 : const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2675 425361 : if (!Top.HazardRec) {
2676 425361 : Top.HazardRec =
2677 : DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2678 425361 : Itin, DAG);
2679 425361 : }
2680 425361 : if (!Bot.HazardRec) {
2681 : Bot.HazardRec =
2682 : DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2683 : Itin, DAG);
2684 : }
2685 : TopCand.SU = nullptr;
2686 425361 : BotCand.SU = nullptr;
2687 425361 : }
2688 157109 :
2689 157109 : /// Initialize the per-region scheduling policy.
2690 157109 : void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2691 : MachineBasicBlock::iterator End,
2692 425361 : unsigned NumRegionInstrs) {
2693 157109 : const MachineFunction &MF = *Begin->getMF();
2694 157109 : const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2695 157109 :
2696 : // Avoid setting up the register pressure tracker for small regions to save
2697 425361 : // compile time. As a rough heuristic, only track pressure when the number of
2698 425361 : // schedulable instructions exceeds half the integer register file.
2699 425361 : RegionPolicy.ShouldTrackPressure = true;
2700 : for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2701 : MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2702 970270 : if (TLI->isTypeLegal(LegalIntVT)) {
2703 : unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2704 : TLI->getRegClassFor(LegalIntVT));
2705 : RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2706 970270 : }
2707 : }
2708 :
2709 : // For generic targets, we default to bottom-up, because it's simpler and more
2710 : // compile-time optimizations have been implemented in that direction.
2711 970270 : RegionPolicy.OnlyBottomUp = true;
2712 3881080 :
2713 2910810 : // Allow the subtarget to override default policy.
2714 2910810 : MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2715 2730022 :
2716 2730022 : // After subtarget overrides, apply command line options.
2717 2730022 : if (!EnableRegPressure)
2718 : RegionPolicy.ShouldTrackPressure = false;
2719 :
2720 : // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2721 : // e.g. -misched-bottomup=false allows scheduling in both directions.
2722 : assert((!ForceTopDown || !ForceBottomUp) &&
2723 970270 : "-misched-topdown incompatible with -misched-bottomup");
2724 : if (ForceBottomUp.getNumOccurrences() > 0) {
2725 : RegionPolicy.OnlyBottomUp = ForceBottomUp;
2726 970270 : if (RegionPolicy.OnlyBottomUp)
2727 : RegionPolicy.OnlyTopDown = false;
2728 : }
2729 970270 : if (ForceTopDown.getNumOccurrences() > 0) {
2730 0 : RegionPolicy.OnlyTopDown = ForceTopDown;
2731 : if (RegionPolicy.OnlyTopDown)
2732 : RegionPolicy.OnlyBottomUp = false;
2733 : }
2734 : }
2735 :
2736 970270 : void GenericScheduler::dumpPolicy() const {
2737 0 : // Cannot completely remove virtual function even in release mode.
2738 0 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2739 0 : dbgs() << "GenericScheduler RegionPolicy: "
2740 : << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2741 970270 : << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2742 4 : << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2743 4 : << "\n";
2744 4 : #endif
2745 : }
2746 970270 :
2747 : /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2748 0 : /// critical path by more cycles than it takes to drain the instruction buffer.
2749 : /// We estimate an upper bounds on in-flight instructions as:
2750 : ///
2751 : /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2752 : /// InFlightIterations = AcyclicPath / CyclesPerIteration
2753 : /// InFlightResources = InFlightIterations * LoopResources
2754 : ///
2755 : /// TODO: Check execution resources in addition to IssueCount.
2756 : void GenericScheduler::checkAcyclicLatency() {
2757 0 : if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2758 : return;
2759 :
2760 : // Scaled number of cycles per loop iteration.
2761 : unsigned IterCount =
2762 : std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2763 : Rem.RemIssueCount);
2764 : // Scaled acyclic critical path.
2765 : unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2766 : // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2767 : unsigned InFlightCount =
2768 389637 : (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2769 389637 : unsigned BufferLimit =
2770 : SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2771 :
2772 : Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2773 :
2774 748 : LLVM_DEBUG(
2775 374 : dbgs() << "IssueCycles="
2776 : << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2777 374 : << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2778 : << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2779 374 : << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2780 374 : << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2781 : if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2782 374 : }
2783 :
2784 374 : void GenericScheduler::registerRoots() {
2785 : Rem.CriticalPath = DAG->ExitSU.getDepth();
2786 :
2787 : // Some roots may not feed into ExitSU. Check all of them in case.
2788 : for (const SUnit *SU : Bot.Available) {
2789 : if (SU->getDepth() > Rem.CriticalPath)
2790 : Rem.CriticalPath = SU->getDepth();
2791 : }
2792 : LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2793 : if (DumpCriticalPathLength) {
2794 : errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2795 : }
2796 425361 :
2797 850722 : if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2798 : Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2799 : checkAcyclicLatency();
2800 1269998 : }
2801 844637 : }
2802 65030 :
2803 : namespace llvm {
2804 : bool tryPressure(const PressureChange &TryP,
2805 425361 : const PressureChange &CandP,
2806 0 : GenericSchedulerBase::SchedCandidate &TryCand,
2807 : GenericSchedulerBase::SchedCandidate &Cand,
2808 : GenericSchedulerBase::CandReason Reason,
2809 425361 : const TargetRegisterInfo *TRI,
2810 389637 : const MachineFunction &MF) {
2811 389637 : // If one candidate decreases and the other increases, go with it.
2812 : // Invalid candidates have UnitInc==0.
2813 425361 : if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2814 : Reason)) {
2815 : return true;
2816 33601048 : }
2817 : // Do not compare the magnitude of pressure changes between top and bottom
2818 : // boundary.
2819 : if (Cand.AtTop != TryCand.AtTop)
2820 : return false;
2821 :
2822 : // If both candidates affect the same set in the same boundary, go with the
2823 : // smallest increase.
2824 : unsigned TryPSet = TryP.getPSetOrMax();
2825 33601048 : unsigned CandPSet = CandP.getPSetOrMax();
2826 : if (TryPSet == CandPSet) {
2827 : return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2828 : Reason);
2829 : }
2830 :
2831 33584844 : int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2832 : std::numeric_limits<int>::max();
2833 :
2834 : int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2835 : std::numeric_limits<int>::max();
2836 33226222 :
2837 33226222 : // If the candidates are decreasing pressure, reverse priority.
2838 33226222 : if (TryP.getUnitInc() < 0)
2839 98680212 : std::swap(TryRank, CandRank);
2840 32893404 : return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2841 : }
2842 :
2843 332818 : unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2844 : return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2845 : }
2846 332818 :
2847 : /// Minimize physical register live ranges. Regalloc wants them adjacent to
2848 : /// their physreg def/use.
2849 : ///
2850 332818 : /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2851 : /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2852 332818 : /// with the operation that produces or consumes the physreg. We'll do this when
2853 : /// regalloc has support for parallel copies.
2854 : int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2855 22624375 : const MachineInstr *MI = SU->getInstr();
2856 22624375 : if (!MI->isCopy())
2857 : return 0;
2858 :
2859 : unsigned ScheduledOper = isTop ? 1 : 0;
2860 : unsigned UnscheduledOper = isTop ? 0 : 1;
2861 : // If we have already scheduled the physreg produce/consumer, immediately
2862 : // schedule the copy.
2863 : if (TargetRegisterInfo::isPhysicalRegister(
2864 : MI->getOperand(ScheduledOper).getReg()))
2865 : return 1;
2866 24783226 : // If the physreg is at the boundary, defer it. Otherwise schedule it
2867 24783226 : // immediately to free the dependent. We can hoist the copy later.
2868 24783226 : bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2869 : if (TargetRegisterInfo::isPhysicalRegister(
2870 : MI->getOperand(UnscheduledOper).getReg()))
2871 3092773 : return AtBoundary ? -1 : 1;
2872 3092773 : return 0;
2873 : }
2874 : } // end namespace llvm
2875 6185546 :
2876 3092773 : void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
2877 : bool AtTop,
2878 : const RegPressureTracker &RPTracker,
2879 : RegPressureTracker &TempTracker) {
2880 2067869 : Cand.SU = SU;
2881 4135738 : Cand.AtTop = AtTop;
2882 : if (DAG->isTrackingPressure()) {
2883 939715 : if (AtTop) {
2884 : TempTracker.getMaxDownwardPressureDelta(
2885 : Cand.SU->getInstr(),
2886 : Cand.RPDelta,
2887 : DAG->getRegionCriticalPSets(),
2888 10379769 : DAG->getRegPressure().MaxSetPressure);
2889 : } else {
2890 : if (VerifyScheduling) {
2891 : TempTracker.getMaxUpwardPressureDelta(
2892 10379769 : Cand.SU->getInstr(),
2893 10379769 : &DAG->getPressureDiff(Cand.SU),
2894 10379769 : Cand.RPDelta,
2895 9355488 : DAG->getRegionCriticalPSets(),
2896 72571 : DAG->getRegPressure().MaxSetPressure);
2897 72571 : } else {
2898 72571 : RPTracker.getUpwardPressureDelta(
2899 : Cand.SU->getInstr(),
2900 : DAG->getPressureDiff(Cand.SU),
2901 : Cand.RPDelta,
2902 9282917 : DAG->getRegionCriticalPSets(),
2903 19 : DAG->getRegPressure().MaxSetPressure);
2904 19 : }
2905 19 : }
2906 19 : }
2907 : LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2908 : << " Try SU(" << Cand.SU->NodeNum << ") "
2909 : << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2910 9282898 : << Cand.RPDelta.Excess.getUnitInc() << "\n");
2911 9282898 : }
2912 :
2913 9282898 : /// Apply a set of heuristics to a new candidate. Heuristics are currently
2914 : /// hierarchical. This may be more efficient than a graduated cost model because
2915 : /// we don't need to evaluate all aspects of the model for each node in the
2916 : /// queue. But it's really done to make the heuristics easier to debug and
2917 : /// statistically analyze.
2918 : ///
2919 : /// \param Cand provides the policy and current best candidate.
2920 : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2921 : /// \param Zone describes the scheduled zone that we are extending, or nullptr
2922 : // if Cand is from a different zone than TryCand.
2923 10379769 : void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2924 : SchedCandidate &TryCand,
2925 : SchedBoundary *Zone) const {
2926 : // Initialize the candidate if needed.
2927 : if (!Cand.isValid()) {
2928 : TryCand.Reason = NodeOrder;
2929 : return;
2930 : }
2931 :
2932 : if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
2933 : biasPhysRegCopy(Cand.SU, Cand.AtTop),
2934 : TryCand, Cand, PhysRegCopy))
2935 13874679 : return;
2936 :
2937 : // Avoid exceeding the target's limit.
2938 : if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2939 13874679 : Cand.RPDelta.Excess,
2940 1483066 : TryCand, Cand, RegExcess, TRI,
2941 1483066 : DAG->MF))
2942 : return;
2943 :
2944 12391613 : // Avoid increasing the max critical pressure in the scheduled region.
2945 12391613 : if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2946 : Cand.RPDelta.CriticalMax,
2947 : TryCand, Cand, RegCritical, TRI,
2948 : DAG->MF))
2949 : return;
2950 23177916 :
2951 11394939 : // We only compare a subset of features when comparing nodes between
2952 11394939 : // Top and Bottom boundary. Some properties are simply incomparable, in many
2953 11394939 : // other instances we should only override the other boundary if something
2954 : // is a clear good pick on one boundary. Skip heuristics that are more
2955 : // "tie-breaking" in nature.
2956 : bool SameBoundary = Zone != nullptr;
2957 23086334 : if (SameBoundary) {
2958 11349148 : // For loops that are acyclic path limited, aggressively schedule for
2959 11349148 : // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
2960 11349148 : // heuristics to take precedence.
2961 : if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
2962 : tryLatency(TryCand, Cand, *Zone))
2963 : return;
2964 :
2965 : // Prioritize instructions that read unbuffered resources by stall cycles.
2966 : if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
2967 : Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2968 : return;
2969 11400077 : }
2970 :
2971 : // Keep clustered nodes together to encourage downstream peephole
2972 : // optimizations which may reduce resource requirements.
2973 11274438 : //
2974 893 : // This is a best effort to set things up for a post-RA pass. Optimizations
2975 : // like generating loads of multiple registers should ideally be done within
2976 : // the scheduler pass by combining the loads during DAG postprocessing.
2977 : const SUnit *CandNextClusterSU =
2978 11272922 : Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2979 11272922 : const SUnit *TryCandNextClusterSU =
2980 : TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2981 : if (tryGreater(TryCand.SU == TryCandNextClusterSU,
2982 : Cand.SU == CandNextClusterSU,
2983 : TryCand, Cand, Cluster))
2984 : return;
2985 :
2986 : if (SameBoundary) {
2987 : // Weak edges are for clustering and other constraints.
2988 : if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
2989 : getWeakLeft(Cand.SU, Cand.AtTop),
2990 11362948 : TryCand, Cand, Weak))
2991 : return;
2992 11362948 : }
2993 11362948 :
2994 11362948 : // Avoid increasing the max pressure of the entire region.
2995 : if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2996 : Cand.RPDelta.CurrentMax,
2997 : TryCand, Cand, RegMax, TRI,
2998 11323005 : DAG->MF))
2999 : return;
3000 11196825 :
3001 11196825 : if (SameBoundary) {
3002 : // Avoid critical resource consumption and balance the schedule.
3003 : TryCand.initResourceDelta(DAG, SchedModel);
3004 : if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3005 : TryCand, Cand, ResourceReduce))
3006 : return;
3007 22100863 : if (tryGreater(TryCand.ResDelta.DemandedResources,
3008 10856961 : Cand.ResDelta.DemandedResources,
3009 10856961 : TryCand, Cand, ResourceDemand))
3010 10856961 : return;
3011 :
3012 : // Avoid serializing long latency dependence chains.
3013 11089832 : // For acyclic path limited loops, latency was already checked above.
3014 : if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3015 10963652 : !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3016 10963652 : return;
3017 :
3018 : // Fall through to original instruction order.
3019 10963576 : if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3020 10963576 : || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3021 : TryCand.Reason = NodeOrder;
3022 : }
3023 : }
3024 : }
3025 :
3026 10961289 : /// Pick the best candidate from the queue.
3027 11859611 : ///
3028 : /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3029 : /// DAG building. To adjust for the current scheduling location we need to
3030 : /// maintain the number of vreg uses remaining to be top-scheduled.
3031 329875 : void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3032 10815186 : const CandPolicy &ZonePolicy,
3033 2505609 : const RegPressureTracker &RPTracker,
3034 : SchedCandidate &Cand) {
3035 : // getMaxPressureDelta temporarily modifies the tracker.
3036 : RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3037 :
3038 : ReadyQueue &Q = Zone.Available;
3039 : for (SUnit *SU : Q) {
3040 :
3041 : SchedCandidate TryCand(ZonePolicy);
3042 : initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3043 1168450 : // Pass SchedBoundary only when comparing nodes from the same boundary.
3044 : SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3045 : tryCandidate(Cand, TryCand, ZoneArg);
3046 : if (TryCand.Reason != NoCand) {
3047 : // Initialize resource delta if needed in case future heuristics query it.
3048 : if (TryCand.ResDelta == SchedResourceDelta())
3049 : TryCand.initResourceDelta(DAG, SchedModel);
3050 : Cand.setBest(TryCand);
3051 11548219 : LLVM_DEBUG(traceCandidate(Cand));
3052 : }
3053 : }
3054 10379769 : }
3055 :
3056 10379769 : /// Pick the best candidate node from either the top or bottom queue.
3057 10379769 : SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3058 10379769 : // Schedule as far as possible in the direction of no choice. This is most
3059 : // efficient, but also provides the best heuristics for CriticalPSets.
3060 3231962 : if (SUnit *SU = Bot.pickOnlyChoice()) {
3061 3231166 : IsTopNode = false;
3062 : tracePick(Only1, false);
3063 : return SU;
3064 : }
3065 : if (SUnit *SU = Top.pickOnlyChoice()) {
3066 1168450 : IsTopNode = true;
3067 : tracePick(Only1, true);
3068 : return SU;
3069 143498 : }
3070 : // Set the bottom-up policy based on the state of the current bottom zone and
3071 : // the instructions outside the zone, including the top zone.
3072 143498 : CandPolicy BotPolicy;
3073 90790 : setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3074 : // Set the top-down policy based on the state of the current top zone and
3075 90790 : // the instructions outside the zone, including the bottom zone.
3076 : CandPolicy TopPolicy;
3077 52708 : setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3078 3618 :
3079 : // See if BotCand is still valid (because we previously scheduled from Top).
3080 3618 : LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3081 : if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3082 : BotCand.Policy != BotPolicy) {
3083 : BotCand.reset(CandPolicy());
3084 49090 : pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3085 49090 : assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3086 : } else {
3087 : LLVM_DEBUG(traceCandidate(BotCand));
3088 49090 : #ifndef NDEBUG
3089 49090 : if (VerifyScheduling) {
3090 : SchedCandidate TCand;
3091 : TCand.reset(CandPolicy());
3092 : pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3093 49090 : assert(TCand.SU == BotCand.SU &&
3094 : "Last pick result should correspond to re-picking right now");
3095 37342 : }
3096 74684 : #endif
3097 : }
3098 :
3099 : // Check if the top Q has a better candidate.
3100 : LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3101 : if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3102 : TopCand.Policy != TopPolicy) {
3103 : TopCand.reset(CandPolicy());
3104 : pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3105 : assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3106 : } else {
3107 : LLVM_DEBUG(traceCandidate(TopCand));
3108 : #ifndef NDEBUG
3109 : if (VerifyScheduling) {
3110 : SchedCandidate TCand;
3111 : TCand.reset(CandPolicy());
3112 : pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3113 49090 : assert(TCand.SU == TopCand.SU &&
3114 : "Last pick result should correspond to re-picking right now");
3115 31336 : }
3116 62672 : #endif
3117 : }
3118 :
3119 : // Pick best from BotCand and TopCand.
3120 : assert(BotCand.isValid());
3121 : assert(TopCand.isValid());
3122 : SchedCandidate Cand = BotCand;
3123 : TopCand.Reason = NoCand;
3124 : tryCandidate(Cand, TopCand, nullptr);
3125 : if (TopCand.Reason != NoCand) {
3126 : Cand.setBest(TopCand);
3127 : LLVM_DEBUG(traceCandidate(Cand));
3128 : }
3129 :
3130 : IsTopNode = Cand.AtTop;
3131 : tracePick(Cand);
3132 : return Cand.SU;
3133 : }
3134 49090 :
3135 49090 : /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3136 49090 : SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3137 49090 : if (DAG->top() == DAG->bottom()) {
3138 : assert(Top.Available.empty() && Top.Pending.empty() &&
3139 : Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3140 : return nullptr;
3141 : }
3142 49090 : SUnit *SU;
3143 : do {
3144 49090 : if (RegionPolicy.OnlyTopDown) {
3145 : SU = Top.pickOnlyChoice();
3146 : if (!SU) {
3147 : CandPolicy NoPolicy;
3148 2568977 : TopCand.reset(NoPolicy);
3149 2568977 : pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3150 : assert(TopCand.Reason != NoCand && "failed to find a candidate");
3151 : tracePick(TopCand);
3152 : SU = TopCand.SU;
3153 : }
3154 : IsTopNode = true;
3155 : } else if (RegionPolicy.OnlyBottomUp) {
3156 2165070 : SU = Bot.pickOnlyChoice();
3157 54 : if (!SU) {
3158 54 : CandPolicy NoPolicy;
3159 45 : BotCand.reset(NoPolicy);
3160 45 : pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3161 90 : assert(BotCand.Reason != NoCand && "failed to find a candidate");
3162 : tracePick(BotCand);
3163 : SU = BotCand.SU;
3164 45 : }
3165 : IsTopNode = false;
3166 54 : } else {
3167 2165016 : SU = pickNodeBidirectional(IsTopNode);
3168 2021518 : }
3169 2021518 : } while (SU->isScheduled);
3170 1099727 :
3171 1099727 : if (SU->isTopReady())
3172 2199454 : Top.removeReady(SU);
3173 : if (SU->isBottomReady())
3174 : Bot.removeReady(SU);
3175 1099727 :
3176 : LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3177 2021518 : << *SU->getInstr());
3178 : return SU;
3179 143498 : }
3180 :
3181 2165070 : void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
3182 : MachineBasicBlock::iterator InsertPos = SU->getInstr();
3183 2165070 : if (!isTop)
3184 1053517 : ++InsertPos;
3185 2165070 : SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3186 2152743 :
3187 : // Find already scheduled copies with a single physreg dependence and move
3188 : // them just above the scheduled instruction.
3189 : for (SDep &Dep : Deps) {
3190 : if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3191 : continue;
3192 : SUnit *DepSU = Dep.getSUnit();
3193 200566 : if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3194 200566 : continue;
3195 200566 : MachineInstr *Copy = DepSU->getInstr();
3196 : if (!Copy->isCopy())
3197 200566 : continue;
3198 : LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3199 : DAG->dumpNode(*Dep.getSUnit()));
3200 : DAG->moveInstruction(Copy, InsertPos);
3201 711328 : }
3202 510762 : }
3203 :
3204 : /// Update the scheduler's state after scheduling a node. This is the same node
3205 144882 : /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3206 : /// update it's state based on the current cycle before MachineSchedStrategy
3207 10936 : /// does.
3208 10936 : ///
3209 : /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3210 : /// them here. See comments in biasPhysRegCopy.
3211 : void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3212 5046 : if (IsTopNode) {
3213 : SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3214 200566 : Top.bumpNode(SU);
3215 : if (SU->hasPhysRegUses)
3216 : reschedulePhysRegCopies(SU, true);
3217 : } else {
3218 : SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3219 : Bot.bumpNode(SU);
3220 : if (SU->hasPhysRegDefs)
3221 : reschedulePhysRegCopies(SU, false);
3222 : }
3223 2506506 : }
3224 2506506 :
3225 95939 : /// Create the standard converging machine scheduler. This will be used as the
3226 95939 : /// default scheduler if the target does not set a default.
3227 95939 : ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3228 63162 : ScheduleDAGMILive *DAG =
3229 : new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3230 2410567 : // Register DAG post-processors.
3231 2410567 : //
3232 2410567 : // FIXME: extend the mutation API to allow earlier mutations to instantiate
3233 137404 : // data and pass it to later mutations. Have a single mutation that gathers
3234 : // the interesting nodes in one pass.
3235 2506506 : DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
3236 : return DAG;
3237 : }
3238 :
3239 142003 : static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
3240 : return createGenericSchedLive(C);
3241 142003 : }
3242 :
3243 : static MachineSchedRegistry
3244 : GenericSchedRegistry("converge", "Standard converging scheduler.",
3245 : createConveringSched);
3246 :
3247 142003 : //===----------------------------------------------------------------------===//
3248 142003 : // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3249 : //===----------------------------------------------------------------------===//
3250 :
3251 0 : void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3252 0 : DAG = Dag;
3253 : SchedModel = DAG->getSchedModel();
3254 : TRI = DAG->TRI;
3255 :
3256 : Rem.init(DAG, SchedModel);
3257 : Top.init(DAG, SchedModel, &Rem);
3258 : BotRoots.clear();
3259 :
3260 : // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3261 : // or are disabled, then these HazardRecs will be disabled.
3262 : const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3263 17716 : if (!Top.HazardRec) {
3264 17716 : Top.HazardRec =
3265 17716 : DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
3266 17716 : Itin, DAG);
3267 : }
3268 17716 : }
3269 17716 :
3270 : void PostGenericScheduler::registerRoots() {
3271 : Rem.CriticalPath = DAG->ExitSU.getDepth();
3272 :
3273 : // Some roots may not feed into ExitSU. Check all of them in case.
3274 17716 : for (const SUnit *SU : BotRoots) {
3275 17716 : if (SU->getDepth() > Rem.CriticalPath)
3276 16195 : Rem.CriticalPath = SU->getDepth();
3277 16195 : }
3278 16195 : LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3279 : if (DumpCriticalPathLength) {
3280 17716 : errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3281 : }
3282 17716 : }
3283 35432 :
3284 : /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3285 : ///
3286 44090 : /// \param Cand provides the policy and current best candidate.
3287 26374 : /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3288 4120 : void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3289 : SchedCandidate &TryCand) {
3290 : // Initialize the candidate if needed.
3291 17716 : if (!Cand.isValid()) {
3292 0 : TryCand.Reason = NodeOrder;
3293 : return;
3294 17716 : }
3295 :
3296 : // Prioritize instructions that read unbuffered resources by stall cycles.
3297 : if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3298 : Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3299 : return;
3300 120056 :
3301 : // Keep clustered nodes together.
3302 : if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3303 120056 : Cand.SU == DAG->getNextClusterSucc(),
3304 26961 : TryCand, Cand, Cluster))
3305 26961 : return;
3306 :
3307 : // Avoid critical resource consumption and balance the schedule.
3308 : if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3309 93095 : TryCand, Cand, ResourceReduce))
3310 93095 : return;
3311 : if (tryGreater(TryCand.ResDelta.DemandedResources,
3312 : Cand.ResDelta.DemandedResources,
3313 : TryCand, Cand, ResourceDemand))
3314 93070 : return;
3315 93070 :
3316 : // Avoid serializing long latency dependence chains.
3317 : if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3318 : return;
3319 : }
3320 92907 :
3321 : // Fall through to original instruction order.
3322 : if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3323 92492 : TryCand.Reason = NodeOrder;
3324 92492 : }
3325 :
3326 : void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3327 : ReadyQueue &Q = Top.Available;
3328 : for (SUnit *SU : Q) {
3329 92492 : SchedCandidate TryCand(Cand.Policy);
3330 : TryCand.SU = SU;
3331 : TryCand.AtTop = true;
3332 : TryCand.initResourceDelta(DAG, SchedModel);
3333 : tryCandidate(Cand, TryCand);
3334 60747 : if (TryCand.Reason != NoCand) {
3335 24251 : Cand.setBest(TryCand);
3336 : LLVM_DEBUG(traceCandidate(Cand));
3337 : }
3338 26961 : }
3339 : }
3340 147017 :
3341 : /// Pick the next node to schedule.
3342 120056 : SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3343 120056 : if (DAG->top() == DAG->bottom()) {
3344 120056 : assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3345 120056 : return nullptr;
3346 120056 : }
3347 : SUnit *SU;
3348 : do {
3349 : SU = Top.pickOnlyChoice();
3350 : if (SU) {
3351 26961 : tracePick(Only1, true);
3352 : } else {
3353 : CandPolicy NoPolicy;
3354 103062 : SchedCandidate TopCand(NoPolicy);
3355 103062 : // Set the top-down policy based on the state of the current top zone and
3356 : // the instructions outside the zone, including the bottom zone.
3357 : setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3358 : pickNodeFromQueue(TopCand);
3359 : assert(TopCand.Reason != NoCand && "failed to find a candidate");
3360 : tracePick(TopCand);
3361 85346 : SU = TopCand.SU;
3362 85346 : }
3363 : } while (SU->isScheduled);
3364 :
3365 : IsTopNode = true;
3366 : Top.removeReady(SU);
3367 :
3368 : LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3369 26961 : << *SU->getInstr());
3370 26961 : return SU;
3371 : }
3372 :
3373 26961 : /// Called after ScheduleDAGMI has scheduled an instruction and updated
3374 : /// scheduled/remaining flags in the DAG nodes.
3375 85346 : void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3376 : SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3377 85346 : Top.bumpNode(SU);
3378 85346 : }
3379 :
3380 : ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
3381 : return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3382 85346 : /*RemoveKillFlags=*/true);
3383 : }
3384 :
3385 : //===----------------------------------------------------------------------===//
3386 : // ILP Scheduler. Currently for experimental analysis of heuristics.
3387 85346 : //===----------------------------------------------------------------------===//
3388 85346 :
3389 85346 : namespace {
3390 85346 :
3391 : /// Order nodes by the ILP metric.
3392 19456 : struct ILPOrder {
3393 : const SchedDFSResult *DFSResult = nullptr;
3394 19456 : const BitVector *ScheduledTrees = nullptr;
3395 : bool MaximizeILP;
3396 :
3397 : ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3398 :
3399 : /// Apply a less-than relation on node priority.
3400 : ///
3401 : /// (Return true if A comes after B in the Q.)
3402 : bool operator()(const SUnit *A, const SUnit *B) const {
3403 : unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3404 : unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3405 : if (SchedTreeA != SchedTreeB) {
3406 : // Unscheduled trees have lower priority.
3407 : if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3408 : return ScheduledTrees->test(SchedTreeB);
3409 0 :
3410 : // Trees with shallower connections have have lower priority.
3411 : if (DFSResult->getSubtreeLevel(SchedTreeA)
3412 : != DFSResult->getSubtreeLevel(SchedTreeB)) {
3413 : return DFSResult->getSubtreeLevel(SchedTreeA)
3414 300 : < DFSResult->getSubtreeLevel(SchedTreeB);
3415 300 : }
3416 : }
3417 300 : if (MaximizeILP)
3418 : return DFSResult->getILP(A) < DFSResult->getILP(B);
3419 318 : else
3420 : return DFSResult->getILP(A) > DFSResult->getILP(B);
3421 : }
3422 : };
3423 58 :
3424 : /// Schedule based on the ILP metric.
3425 : class ILPScheduler : public MachineSchedStrategy {
3426 0 : ScheduleDAGMILive *DAG = nullptr;
3427 : ILPOrder Cmp;
3428 :
3429 199 : std::vector<SUnit*> ReadyQ;
3430 116 :
3431 : public:
3432 83 : ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3433 :
3434 : void initialize(ScheduleDAGMI *dag) override {
3435 : assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3436 : DAG = static_cast<ScheduleDAGMILive*>(dag);
3437 : DAG->computeDFSResult();
3438 : Cmp.DFSResult = DAG->getDFSResult();
3439 : Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3440 : ReadyQ.clear();
3441 : }
3442 :
3443 : void registerRoots() override {
3444 0 : // Restore the heap in ReadyQ with the updated DFS results.
3445 : std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3446 10 : }
3447 :
3448 10 : /// Implement MachineSchedStrategy interface.
3449 10 : /// -----------------------------------------
3450 10 :
3451 10 : /// Callback to select the highest priority node from the ready Q.
3452 : SUnit *pickNode(bool &IsTopNode) override {
3453 10 : if (ReadyQ.empty()) return nullptr;
3454 : std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3455 10 : SUnit *SU = ReadyQ.back();
3456 : ReadyQ.pop_back();
3457 10 : IsTopNode = false;
3458 10 : LLVM_DEBUG(dbgs() << "Pick node "
3459 : << "SU(" << SU->NodeNum << ") "
3460 : << " ILP: " << DAG->getDFSResult()->getILP(SU)
3461 : << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3462 : << " @"
3463 : << DAG->getDFSResult()->getSubtreeLevel(
3464 186 : DAG->getDFSResult()->getSubtreeID(SU))
3465 186 : << '\n'
3466 176 : << "Scheduling " << *SU->getInstr());
3467 176 : return SU;
3468 : }
3469 176 :
3470 : /// Scheduler callback to notify that a new subtree is scheduled.
3471 : void scheduleTree(unsigned SubtreeID) override {
3472 : std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3473 : }
3474 :
3475 : /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3476 : /// DFSResults, and resort the priority Q.
3477 : void schedNode(SUnit *SU, bool IsTopNode) override {
3478 : assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3479 176 : }
3480 :
3481 : void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3482 :
3483 38 : void releaseBottomNode(SUnit *SU) override {
3484 38 : ReadyQ.push_back(SU);
3485 38 : std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3486 : }
3487 : };
3488 :
3489 176 : } // end anonymous namespace
3490 :
3491 176 : static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3492 : return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3493 64 : }
3494 : static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3495 176 : return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3496 176 : }
3497 176 :
3498 176 : static MachineSchedRegistry ILPMaxRegistry(
3499 : "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3500 : static MachineSchedRegistry ILPMinRegistry(
3501 : "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3502 :
3503 6 : //===----------------------------------------------------------------------===//
3504 12 : // Machine Instruction Shuffler for Correctness Testing
3505 : //===----------------------------------------------------------------------===//
3506 2 :
3507 4 : #ifndef NDEBUG
3508 : namespace {
3509 :
3510 : /// Apply a less-than relation on the node order, which corresponds to the
3511 : /// instruction order prior to scheduling. IsReverse implements greater-than.
3512 : template<bool IsReverse>
3513 : struct SUnitOrder {
3514 : bool operator()(SUnit *A, SUnit *B) const {
3515 : if (IsReverse)
3516 : return A->NodeNum > B->NodeNum;
3517 : else
3518 : return A->NodeNum < B->NodeNum;
3519 : }
3520 : };
3521 :
3522 : /// Reorder instructions as much as possible.
3523 : class InstructionShuffler : public MachineSchedStrategy {
3524 : bool IsAlternating;
3525 : bool IsTopDown;
3526 :
3527 : // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3528 : // gives nodes with a higher number higher priority causing the latest
3529 : // instructions to be scheduled first.
3530 : PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3531 : TopQ;
3532 :
3533 : // When scheduling bottom-up, use greater-than as the queue priority.
3534 : PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
3535 : BottomQ;
3536 :
3537 : public:
3538 : InstructionShuffler(bool alternate, bool topdown)
3539 : : IsAlternating(alternate), IsTopDown(topdown) {}
3540 :
3541 : void initialize(ScheduleDAGMI*) override {
3542 : TopQ.clear();
3543 : BottomQ.clear();
3544 : }
3545 :
3546 : /// Implement MachineSchedStrategy interface.
3547 : /// -----------------------------------------
3548 :
3549 : SUnit *pickNode(bool &IsTopNode) override {
3550 : SUnit *SU;
3551 : if (IsTopDown) {
3552 : do {
3553 : if (TopQ.empty()) return nullptr;
3554 : SU = TopQ.top();
3555 : TopQ.pop();
3556 : } while (SU->isScheduled);
3557 : IsTopNode = true;
3558 : } else {
3559 : do {
3560 : if (BottomQ.empty()) return nullptr;
3561 : SU = BottomQ.top();
3562 : BottomQ.pop();
3563 : } while (SU->isScheduled);
3564 : IsTopNode = false;
3565 : }
3566 : if (IsAlternating)
3567 : IsTopDown = !IsTopDown;
3568 : return SU;
3569 : }
3570 :
3571 : void schedNode(SUnit *SU, bool IsTopNode) override {}
3572 :
3573 : void releaseTopNode(SUnit *SU) override {
3574 : TopQ.push(SU);
3575 : }
3576 : void releaseBottomNode(SUnit *SU) override {
3577 : BottomQ.push(SU);
3578 : }
3579 : };
3580 :
3581 : } // end anonymous namespace
3582 :
3583 : static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3584 : bool Alternate = !ForceTopDown && !ForceBottomUp;
3585 : bool TopDown = !ForceBottomUp;
3586 : assert((TopDown || !ForceTopDown) &&
3587 : "-misched-topdown incompatible with -misched-bottomup");
3588 : return new ScheduleDAGMILive(
3589 : C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3590 : }
3591 :
3592 : static MachineSchedRegistry ShufflerRegistry(
3593 : "shuffle", "Shuffle machine instructions alternating directions",
3594 : createInstructionShuffler);
3595 : #endif // !NDEBUG
3596 :
3597 : //===----------------------------------------------------------------------===//
3598 : // GraphWriter support for ScheduleDAGMILive.
3599 : //===----------------------------------------------------------------------===//
3600 :
3601 : #ifndef NDEBUG
3602 : namespace llvm {
3603 :
3604 : template<> struct GraphTraits<
3605 : ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3606 :
3607 : template<>
3608 : struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3609 : DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
3610 :
3611 : static std::string getGraphName(const ScheduleDAG *G) {
3612 : return G->MF.getName();
3613 : }
3614 :
3615 : static bool renderGraphFromBottomUp() {
3616 : return true;
3617 : }
3618 :
3619 : static bool isNodeHidden(const SUnit *Node) {
3620 : if (ViewMISchedCutoff == 0)
3621 : return false;
3622 : return (Node->Preds.size() > ViewMISchedCutoff
3623 : || Node->Succs.size() > ViewMISchedCutoff);
3624 : }
3625 :
3626 : /// If you want to override the dot attributes printed for a particular
3627 : /// edge, override this method.
3628 : static std::string getEdgeAttributes(const SUnit *Node,
3629 : SUnitIterator EI,
3630 : const ScheduleDAG *Graph) {
3631 : if (EI.isArtificialDep())
3632 : return "color=cyan,style=dashed";
3633 : if (EI.isCtrlDep())
3634 : return "color=blue,style=dashed";
3635 : return "";
3636 : }
3637 :
3638 : static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3639 : std::string Str;
3640 : raw_string_ostream SS(Str);
3641 : const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3642 : const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3643 : static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3644 : SS << "SU:" << SU->NodeNum;
3645 : if (DFS)
3646 : SS << " I:" << DFS->getNumInstrs(SU);
3647 : return SS.str();
3648 : }
3649 :
3650 : static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3651 : return G->getGraphNodeLabel(SU);
3652 : }
3653 :
3654 : static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3655 : std::string Str("shape=Mrecord");
3656 : const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3657 : const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3658 : static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3659 : if (DFS) {
3660 : Str += ",style=filled,fillcolor=\"#";
3661 : Str += DOT::getColorString(DFS->getSubtreeID(N));
3662 : Str += '"';
3663 : }
3664 : return Str;
3665 : }
3666 : };
3667 :
3668 : } // end namespace llvm
3669 : #endif // NDEBUG
3670 :
3671 : /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3672 : /// rendered using 'dot'.
3673 : void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3674 : #ifndef NDEBUG
3675 : ViewGraph(this, Name, false, Title);
3676 : #else
3677 : errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3678 : << "systems with Graphviz or gv!\n";
3679 : #endif // NDEBUG
3680 : }
3681 :
3682 : /// Out-of-line implementation with no arguments is handy for gdb.
3683 : void ScheduleDAGMI::viewGraph() {
3684 : viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3685 0 : }
|