LLVM 19.0.0git
WindowScheduler.cpp
Go to the documentation of this file.
1//======----------- WindowScheduler.cpp - window scheduler -------------======//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Window Scheduling software pipelining algorithm.
10//
11// The fundamental concept of the window scheduling algorithm involves folding
12// the original MBB at a specific position, followed by list scheduling on the
13// folded MIs. The optimal scheduling result is then chosen from various folding
14// positions as the final scheduling outcome.
15//
16// The primary challenge in this algorithm lies in generating the folded MIs and
17// establishing their dependencies. We have innovatively employed a new MBB,
18// created by copying the original MBB three times, known as TripleMBB. This
19// TripleMBB enables the convenient implementation of MI folding and dependency
20// establishment. To facilitate the algorithm's implementation, we have also
21// devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
22//
23// Another challenge in the algorithm is the scheduling of phis. Semantically,
24// it is difficult to place the phis in the window and perform list scheduling.
25// Therefore, we schedule these phis separately after each list scheduling.
26//
27// The provided implementation is designed for use before the Register Allocator
28// (RA). If the target requires implementation after RA, it is recommended to
29// reimplement analyseII(), schedulePhi(), and expand(). Additionally,
30// target-specific logic can be added in initialize(), preProcess(), and
31// postProcess().
32//
33// Lastly, it is worth mentioning that getSearchIndexes() is an important
34// function. We have experimented with more complex heuristics on downstream
35// target and achieved favorable results.
36//
37//===----------------------------------------------------------------------===//
39#include "llvm/ADT/Statistic.h"
46#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50
51#define DEBUG_TYPE "pipeliner"
52
53namespace {
54STATISTIC(NumTryWindowSchedule,
55 "Number of loops that we attempt to use window scheduling");
56STATISTIC(NumTryWindowSearch,
57 "Number of times that we run list schedule in the window scheduling");
58STATISTIC(NumWindowSchedule,
59 "Number of loops that we successfully use window scheduling");
60STATISTIC(NumFailAnalyseII,
61 "Window scheduling abort due to the failure of the II analysis");
62
64 WindowSearchNum("window-search-num",
65 cl::desc("The number of searches per loop in the window "
66 "algorithm. 0 means no search number limit."),
68
69cl::opt<unsigned> WindowSearchRatio(
70 "window-search-ratio",
71 cl::desc("The ratio of searches per loop in the window algorithm. 100 "
72 "means search all positions in the loop, while 0 means not "
73 "performing any search."),
74 cl::Hidden, cl::init(40));
75
76cl::opt<unsigned> WindowIICoeff(
77 "window-ii-coeff",
79 "The coefficient used when initializing II in the window algorithm."),
81
82cl::opt<unsigned> WindowRegionLimit(
83 "window-region-limit",
85 "The lower limit of the scheduling region in the window algorithm."),
87
88cl::opt<unsigned> WindowDiffLimit(
89 "window-diff-limit",
90 cl::desc("The lower limit of the difference between best II and base II in "
91 "the window algorithm. If the difference is smaller than "
92 "this lower limit, window scheduling will not be performed."),
94} // namespace
95
96// WindowIILimit serves as an indicator of abnormal scheduling results and could
97// potentially be referenced by the derived target window scheduler.
99 WindowIILimit("window-ii-limit",
100 cl::desc("The upper limit of II in the window algorithm."),
101 cl::Hidden, cl::init(1000));
102
104 : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
105 Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
106 TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
107 TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
108 createMachineScheduler(/*OnlyBuildGraph=*/true));
109}
110
112 if (!initialize()) {
113 LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
114 return false;
115 }
116 // The window algorithm is time-consuming, and its compilation time should be
117 // taken into consideration.
118 TimeTraceScope Scope("WindowSearch");
119 ++NumTryWindowSchedule;
120 // Performing the relevant processing before window scheduling.
121 preProcess();
122 // The main window scheduling begins.
123 std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
124 auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
125 for (unsigned Idx : SearchIndexes) {
126 OriToCycle.clear();
127 ++NumTryWindowSearch;
128 // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
129 // be added to Idx.
130 unsigned Offset = Idx + SchedPhiNum;
132 SchedDAG->startBlock(MBB);
133 SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
134 SchedDAG->schedule();
135 LLVM_DEBUG(SchedDAG->dump());
136 unsigned II = analyseII(*SchedDAG, Offset);
137 if (II == WindowIILimit) {
139 LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
140 ++NumFailAnalyseII;
141 continue;
142 }
146 LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
147 << II << ".\n");
148 }
149 // Performing the relevant processing after window scheduling.
150 postProcess();
151 // Check whether the scheduling result is valid.
152 if (!isScheduleValid()) {
153 LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
154 return false;
155 }
156 LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
157 << " and Best II is " << BestII << ".\n");
158 // Expand the scheduling result to prologue, kernel, and epilogue.
159 expand();
160 ++NumWindowSchedule;
161 return true;
162}
163
166 return OnlyBuildGraph
167 ? new ScheduleDAGMI(
168 Context, std::make_unique<PostGenericScheduler>(Context),
169 true)
171}
172
175 LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
176 return false;
177 }
178 // Initialized the member variables used by window algorithm.
179 OriMIs.clear();
180 TriMIs.clear();
181 TriToOri.clear();
182 OriToCycle.clear();
183 SchedResult.clear();
184 SchedPhiNum = 0;
185 SchedInstrNum = 0;
186 BestII = UINT_MAX;
187 BestOffset = 0;
188 BaseII = 0;
189 // List scheduling used in the window algorithm depends on LiveIntervals.
190 if (!Context->LIS) {
191 LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
192 return false;
193 }
194 // Check each MI in MBB.
195 SmallSet<Register, 8> PrevDefs;
196 SmallSet<Register, 8> PrevUses;
197 auto IsLoopCarried = [&](MachineInstr &Phi) {
198 // Two cases are checked here: (1)The virtual register defined by the
199 // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
200 // virtual register defined by the succeeding phi.
201 if (PrevUses.count(Phi.getOperand(0).getReg()))
202 return true;
203 PrevDefs.insert(Phi.getOperand(0).getReg());
204 for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
205 if (PrevDefs.count(Phi.getOperand(I).getReg()))
206 return true;
207 PrevUses.insert(Phi.getOperand(I).getReg());
208 }
209 return false;
210 };
211 auto PLI = TII->analyzeLoopForPipelining(MBB);
212 for (auto &MI : *MBB) {
213 if (MI.isMetaInstruction() || MI.isTerminator())
214 continue;
215 if (MI.isPHI()) {
216 if (IsLoopCarried(MI)) {
217 LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
218 return false;
219 }
220 ++SchedPhiNum;
221 ++BestOffset;
222 } else
224 if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
226 dbgs() << "Boundary MI is not allowed in window scheduling!\n");
227 return false;
228 }
229 if (PLI->shouldIgnoreForPipelining(&MI)) {
230 LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
231 "window scheduling!\n");
232 return false;
233 }
234 for (auto &Def : MI.all_defs())
235 if (Def.isReg() && Def.getReg().isPhysical())
236 return false;
237 }
238 if (SchedInstrNum <= WindowRegionLimit) {
239 LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
240 return false;
241 }
242 return true;
243}
244
246 // Prior to window scheduling, it's necessary to backup the original MBB,
247 // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
248 backupMBB();
250 TripleDAG->startBlock(MBB);
251 TripleDAG->enterRegion(
253 std::distance(MBB->begin(), MBB->getFirstTerminator()));
254 TripleDAG->buildSchedGraph(Context->AA);
255}
256
258 // After window scheduling, it's necessary to clear the TripleDAG and restore
259 // to the original MBB.
260 TripleDAG->exitRegion();
261 TripleDAG->finishBlock();
262 restoreMBB();
263}
264
266 for (auto &MI : MBB->instrs())
268 // Remove MIs and the corresponding live intervals.
269 for (auto &MI : make_early_inc_range(*MBB)) {
271 MBB->remove(&MI);
272 }
273}
274
276 // Erase MIs and the corresponding live intervals.
277 for (auto &MI : make_early_inc_range(*MBB)) {
279 MI.eraseFromParent();
280 }
281 // Restore MBB to the state before window scheduling.
282 for (auto *MI : OriMIs)
283 MBB->push_back(MI);
285}
286
288 const unsigned DuplicateNum = 3;
289 TriMIs.clear();
290 TriToOri.clear();
291 assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
292 // Step 1: Performing the first copy of MBB instructions, excluding
293 // terminators. At the same time, we back up the anti-register of phis.
294 // DefPairs hold the old and new define register pairs.
296 for (auto *MI : OriMIs) {
297 if (MI->isMetaInstruction() || MI->isTerminator())
298 continue;
299 if (MI->isPHI())
300 if (Register AntiReg = getAntiRegister(MI))
301 DefPairs[MI->getOperand(0).getReg()] = AntiReg;
302 auto *NewMI = MF->CloneMachineInstr(MI);
303 MBB->push_back(NewMI);
304 TriMIs.push_back(NewMI);
305 TriToOri[NewMI] = MI;
306 }
307 // Step 2: Performing the remaining two copies of MBB instructions excluding
308 // phis, and the last one contains terminators. At the same time, registers
309 // are updated accordingly.
310 for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
311 for (auto *MI : OriMIs) {
312 if (MI->isPHI() || MI->isMetaInstruction() ||
313 (MI->isTerminator() && Cnt < DuplicateNum - 1))
314 continue;
315 auto *NewMI = MF->CloneMachineInstr(MI);
317 // New defines are updated.
318 for (auto MO : NewMI->all_defs())
319 if (MO.isReg() && MO.getReg().isVirtual()) {
320 Register NewDef =
322 NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
323 NewDefs[MO.getReg()] = NewDef;
324 }
325 // New uses are updated.
326 for (auto DefRegPair : DefPairs)
327 if (NewMI->readsRegister(DefRegPair.first, TRI)) {
328 Register NewUse = DefRegPair.second;
329 // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
330 //
331 // BB.3: DefPairs
332 // ==================================
333 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3] (%1,%7)
334 // ...
335 // ==================================
336 // ...
337 // %4 = sub i32 %1, %3
338 // ...
339 // %7 = add i32 %5, %6
340 // ...
341 // ----------------------------------
342 // ...
343 // %8 = sub i32 %7, %3 (%1,%7),(%4,%8)
344 // ...
345 // %9 = add i32 %5, %6 (%1,%7),(%4,%8),(%7,%9)
346 // ...
347 // ----------------------------------
348 // ...
349 // %10 = sub i32 %9, %3 (%1,%7),(%4,%10),(%7,%9)
350 // ... ^
351 // %11 = add i32 %5, %6 (%1,%7),(%4,%10),(%7,%11)
352 // ...
353 // ==================================
354 // < Terminators >
355 // ==================================
356 if (DefPairs.count(NewUse))
357 NewUse = DefPairs[NewUse];
358 NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
359 }
360 // DefPairs is updated at last.
361 for (auto &NewDef : NewDefs)
362 DefPairs[NewDef.first] = NewDef.second;
363 MBB->push_back(NewMI);
364 TriMIs.push_back(NewMI);
365 TriToOri[NewMI] = MI;
366 }
367 }
368 // Step 3: The registers used by phis are updated, and they are generated in
369 // the third copy of MBB.
370 // In the privious example, the old phi is:
371 // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
372 // The new phi is:
373 // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
374 for (auto &Phi : MBB->phis()) {
375 for (auto DefRegPair : DefPairs)
376 if (Phi.readsRegister(DefRegPair.first, TRI))
377 Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
378 }
380}
381
383 // After list scheduling, the MBB is restored in one traversal.
384 for (size_t I = 0; I < TriMIs.size(); ++I) {
385 auto *MI = TriMIs[I];
386 auto OldPos = MBB->begin();
387 std::advance(OldPos, I);
388 auto CurPos = MI->getIterator();
389 if (CurPos != OldPos) {
390 MBB->splice(OldPos, MBB, CurPos);
391 Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
392 }
393 }
394}
395
397 unsigned SearchRatio) {
398 // We use SearchRatio to get the index range, and then evenly get the indexes
399 // according to the SearchNum. This is a simple huristic. Depending on the
400 // characteristics of the target, more complex algorithms can be used for both
401 // performance and compilation time.
402 assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
403 unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
404 unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
405 SmallVector<unsigned> SearchIndexes;
406 for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
407 SearchIndexes.push_back(Idx);
408 return SearchIndexes;
409}
410
412 // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
413 unsigned MaxDepth = 1;
414 for (auto &SU : DAG.SUnits)
415 MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
416 return MaxDepth * WindowIICoeff;
417}
418
420 unsigned Offset) {
421 int InitII = getEstimatedII(DAG);
422 ResourceManager RM(Subtarget, &DAG);
423 RM.init(InitII);
424 // ResourceManager and DAG are used to calculate the maximum cycle for the
425 // scheduled MIs. Since MIs in the Region have already been scheduled, the
426 // emit cycles can be estimated in order here.
427 int CurCycle = 0;
429 for (auto &MI : Range) {
430 auto *SU = DAG.getSUnit(&MI);
431 int ExpectCycle = CurCycle;
432 // The predecessors of current MI determine its earliest issue cycle.
433 for (auto &Pred : SU->Preds) {
434 if (Pred.isWeak())
435 continue;
436 auto *PredMI = Pred.getSUnit()->getInstr();
437 int PredCycle = getOriCycle(PredMI);
438 ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
439 }
440 // ResourceManager can be used to detect resource conflicts between the
441 // current MI and the previously inserted MIs.
442 while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
443 ++CurCycle;
444 if (CurCycle == (int)WindowIILimit)
445 return CurCycle;
446 }
447 RM.reserveResources(*SU, CurCycle);
448 OriToCycle[getOriMI(&MI)] = CurCycle;
449 LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
450 << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
451 }
452 LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
453 return CurCycle;
454}
455
456// By utilizing TripleDAG, we can easily establish dependencies between A and B.
457// Based on the MaxCycle and the issue cycle of A and B, we can determine
458// whether it is necessary to add a stall cycle. This is because, without
459// inserting the stall cycle, the latency constraint between A and B cannot be
460// satisfied. The details are as follows:
461//
462// New MBB:
463// ========================================
464// < Phis >
465// ======================================== (sliding direction)
466// MBB copy 1 |
467// V
468//
469// ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~ ----schedule window-----
470// | |
471// ===================V==================== |
472// MBB copy 2 < MI B > |
473// |
474// < MI A > V
475// ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~ ------------------------
476// :
477// ===================V====================
478// MBB copy 3 < MI B'>
479//
480//
481//
482//
483// ========================================
484// < Terminators >
485// ========================================
486int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
487 int MaxStallCycle = 0;
489 for (auto &MI : Range) {
490 auto *SU = TripleDAG->getSUnit(&MI);
491 int DefCycle = getOriCycle(&MI);
492 for (auto &Succ : SU->Succs) {
493 if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
494 continue;
495 // If the expected cycle does not exceed MaxCycle, no check is needed.
496 if (DefCycle + (int)Succ.getLatency() <= MaxCycle)
497 continue;
498 // If the cycle of the scheduled MI A is less than that of the scheduled
499 // MI B, the scheduling will fail because the lifetime of the
500 // corresponding register exceeds II.
501 auto *SuccMI = Succ.getSUnit()->getInstr();
502 int UseCycle = getOriCycle(SuccMI);
503 if (DefCycle < UseCycle)
504 return WindowIILimit;
505 // Get the stall cycle introduced by the register between two trips.
506 int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle;
507 MaxStallCycle = std::max(MaxStallCycle, StallCycle);
508 }
509 }
510 LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
511 return MaxStallCycle;
512}
513
515 LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
516 int MaxCycle = calculateMaxCycle(DAG, Offset);
517 if (MaxCycle == (int)WindowIILimit)
518 return MaxCycle;
519 int StallCycle = calculateStallCycle(Offset, MaxCycle);
520 if (StallCycle == (int)WindowIILimit)
521 return StallCycle;
522 // The value of II is equal to the maximum execution cycle plus 1.
523 return MaxCycle + StallCycle + 1;
524}
525
527 LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
528 for (auto &Phi : MBB->phis()) {
529 int LateCycle = INT_MAX;
530 auto *SU = TripleDAG->getSUnit(&Phi);
531 for (auto &Succ : SU->Succs) {
532 // Phi doesn't have any Anti successors.
533 if (Succ.getKind() != SDep::Data)
534 continue;
535 // Phi is scheduled before the successor of stage 0. The issue cycle of
536 // phi is the latest cycle in this interval.
537 auto *SuccMI = Succ.getSUnit()->getInstr();
538 int Cycle = getOriCycle(SuccMI);
539 if (getOriStage(getOriMI(SuccMI), Offset) == 0)
540 LateCycle = std::min(LateCycle, Cycle);
541 }
542 // The anti-dependency of phi need to be handled separately in the same way.
543 if (Register AntiReg = getAntiRegister(&Phi)) {
544 auto *AntiMI = MRI->getVRegDef(AntiReg);
545 // AntiReg may be defined outside the kernel MBB.
546 if (AntiMI->getParent() == MBB) {
547 auto AntiCycle = getOriCycle(AntiMI);
548 if (getOriStage(getOriMI(AntiMI), Offset) == 0)
549 LateCycle = std::min(LateCycle, AntiCycle);
550 }
551 }
552 // If there is no limit to the late cycle, a default value is given.
553 if (LateCycle == INT_MAX)
554 LateCycle = (int)(II - 1);
555 LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
556 // The issue cycle of phi is set to the latest cycle in the interval.
557 auto *OriPhi = getOriMI(&Phi);
558 OriToCycle[OriPhi] = LateCycle;
559 }
560}
561
563 unsigned II) {
564 // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
565 // way is to put phi at the beginning of the current cycle.
568 for (auto &Phi : MBB->phis())
569 CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
570 for (auto &MI : Range)
571 CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
572 // Each MI is assigned a separate ordered Id, which is used as a sort marker
573 // in the following expand process.
575 int Id = 0;
576 for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
577 if (!CycleToMIs.count(Cycle))
578 continue;
579 for (auto *MI : CycleToMIs[Cycle])
580 IssueOrder[MI] = Id++;
581 }
582 return IssueOrder;
583}
584
586 // At the first update, Offset is equal to SchedPhiNum. At this time, only
587 // BestII, BestOffset, and BaseII need to be updated.
588 if (Offset == SchedPhiNum) {
589 BestII = II;
591 BaseII = II;
592 return;
593 }
594 // The update will only continue if the II is smaller than BestII and the II
595 // is sufficiently small.
596 if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
597 return;
598 BestII = II;
600 // Record the result of the current list scheduling, noting that each MI is
601 // stored unordered in SchedResult.
602 SchedResult.clear();
603 auto IssueOrder = getIssueOrder(Offset, II);
604 for (auto &Pair : OriToCycle) {
605 assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
606 SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
607 getOriStage(Pair.first, Offset),
608 IssueOrder[Pair.first]));
609 }
610}
611
613 // The MIs in the SchedResult are sorted by the issue order ID.
615 [](const std::tuple<MachineInstr *, int, int, int> &A,
616 const std::tuple<MachineInstr *, int, int, int> &B) {
617 return std::get<3>(A) < std::get<3>(B);
618 });
619 // Use the scheduling infrastructure for expansion, noting that InstrChanges
620 // is not supported here.
621 DenseMap<MachineInstr *, int> Cycles, Stages;
622 std::vector<MachineInstr *> OrderedInsts;
623 for (auto &Info : SchedResult) {
624 auto *MI = std::get<0>(Info);
625 OrderedInsts.push_back(MI);
626 Cycles[MI] = std::get<1>(Info);
627 Stages[MI] = std::get<2>(Info);
628 LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
629 << "]: " << *MI);
630 }
631 ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
632 std::move(Stages));
635 MSE.expand();
636 MSE.cleanup();
637}
638
641 for (MachineInstr &MI : *MBB)
642 for (const MachineOperand &MO : MI.operands()) {
643 if (!MO.isReg() || MO.getReg() == 0)
644 continue;
645 Register Reg = MO.getReg();
646 if (!is_contained(UsedRegs, Reg))
647 UsedRegs.push_back(Reg);
648 }
649 Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
650}
651
654 auto RegionBegin = MBB->begin();
655 std::advance(RegionBegin, Offset);
656 auto RegionEnd = RegionBegin;
657 std::advance(RegionEnd, Num);
658 return make_range(RegionBegin, RegionEnd);
659}
660
662 assert(TriToOri.count(NewMI) && "Cannot find original MI!");
663 auto *OriMI = TriToOri[NewMI];
664 assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
665 return OriToCycle[OriMI];
666}
667
669 assert(TriToOri.count(NewMI) && "Cannot find original MI!");
670 return TriToOri[NewMI];
671}
672
674 assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
675 "Cannot find OriMI in OriMIs!");
676 // If there is no instruction fold, all MI stages are 0.
677 if (Offset == SchedPhiNum)
678 return 0;
679 // For those MIs with an ID less than the Offset, their stages are set to 0,
680 // while the rest are set to 1.
681 unsigned Id = 0;
682 for (auto *MI : OriMIs) {
683 if (MI->isMetaInstruction())
684 continue;
685 if (MI == OriMI)
686 break;
687 ++Id;
688 }
689 return Id >= (size_t)Offset ? 1 : 0;
690}
691
693 assert(Phi->isPHI() && "Expecting PHI!");
694 Register AntiReg;
695 for (auto MO : Phi->uses()) {
696 if (MO.isReg())
697 AntiReg = MO.getReg();
698 else if (MO.isMBB() && MO.getMBB() == MBB)
699 return AntiReg;
700 }
701 return 0;
702}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static const unsigned MaxDepth
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Target-Independent Code Generator Pass Configuration Options pass.
cl::opt< unsigned > WindowIILimit("window-ii-limit", cl::desc("The upper limit of II in the window algorithm."), cl::Hidden, cl::init(1000))
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
A possibly irreducible generalization of a Loop.
void repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, ArrayRef< Register > OrigRegs)
Update live intervals for instructions in a range of iterators.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndexes * getSlotIndexes() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineOperand class - Representation of each machine instruction operand.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Represents a schedule for a single-block loop.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
A ScheduleDAG for scheduling lists of MachineInstr.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
void removeMachineInstrFromMaps(MachineInstr &MI, bool AllowBundled=false)
Removes machine instruction (bundle) MI from the mapping.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const
Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...
virtual bool enableWindowScheduler() const
True if the subtarget should run WindowScheduler.
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Definition: TimeProfiler.h:147
MachineInstr * getOriMI(MachineInstr *NewMI)
Get the original MI from which the new MI is cloned.
virtual ScheduleDAGInstrs * createMachineScheduler(bool OnlyBuildGraph=false)
Two types of ScheduleDAGs are needed, one for creating dependency graphs only, and the other for list...
unsigned SchedPhiNum
SchedPhiNum records the number of phi in the original MBB, and the scheduling starts with MI after ph...
virtual void preProcess()
Add some related processing before running window scheduling.
MachineSchedContext * Context
virtual void restoreTripleMBB()
Restore the order of MIs in TripleMBB after each list scheduling.
virtual void postProcess()
Add some related processing after running window scheduling.
DenseMap< MachineInstr *, int > OriToCycle
OriToCycle keeps the mappings between the original MI and its issue cycle.
virtual int calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset)
Calculate MIs execution cycle after list scheduling.
MachineRegisterInfo * MRI
MachineBasicBlock * MBB
unsigned BestII
BestII and BestOffset record the characteristics of the best scheduling result and are used together ...
void backupMBB()
Back up the MIs in the original MBB and remove them from MBB.
DenseMap< MachineInstr *, MachineInstr * > TriToOri
TriToOri keeps the mappings between the MI clones in TripleMBB and their original MI.
int getEstimatedII(ScheduleDAGInstrs &DAG)
Estimate a II value at which all MIs will be scheduled successfully.
virtual void expand()
Using the scheduling infrastructure to expand the results of window scheduling.
DenseMap< MachineInstr *, int > getIssueOrder(unsigned Offset, unsigned II)
Get the final issue order of all scheduled MIs including phis.
Register getAntiRegister(MachineInstr *Phi)
Gets the register in phi which is generated from the current MBB.
unsigned getOriStage(MachineInstr *OriMI, unsigned Offset)
Get the scheduling stage, where the stage of the new MI is identical to the original MI.
const TargetRegisterInfo * TRI
virtual void updateLiveIntervals()
Update the live intervals for all registers used within MBB.
const TargetSubtargetInfo * Subtarget
std::unique_ptr< ScheduleDAGInstrs > TripleDAG
To innovatively identify the dependencies between MIs across two trips, we construct a DAG for a new ...
unsigned BaseII
BaseII is the II obtained when the window offset is SchedPhiNum.
virtual void schedulePhi(int Offset, unsigned &II)
Phis are scheduled separately after each list scheduling.
MachineFunction * MF
virtual unsigned analyseII(ScheduleDAGInstrs &DAG, unsigned Offset)
Analyzes the II value after each list scheduling.
int getOriCycle(MachineInstr *NewMI)
Get the issue cycle of the new MI based on the cycle of the original MI.
unsigned SchedInstrNum
SchedInstrNum records the MIs involved in scheduling in the original MBB, excluding debug instruction...
const TargetInstrInfo * TII
virtual void generateTripleMBB()
Make three copies of the original MBB to generate a new TripleMBB.
iterator_range< MachineBasicBlock::iterator > getScheduleRange(unsigned Offset, unsigned Num)
Gets the iterator range of MIs in the scheduling window.
virtual bool isScheduleValid()
Check whether the final result of window scheduling is valid.
virtual int calculateStallCycle(unsigned Offset, int MaxCycle)
Calculate the stall cycle between two trips after list scheduling.
virtual void updateScheduleResult(unsigned Offset, unsigned II)
Update the scheduling result after each list scheduling.
SmallVector< MachineInstr * > OriMIs
OriMIs keeps the MIs removed from the original MBB.
virtual bool initialize()
Initializes the algorithm and determines if it can be executed.
SmallVector< std::tuple< MachineInstr *, int, int, int >, 256 > SchedResult
SchedResult keeps the result of each list scheduling, and the format of the tuple is <MI pointer,...
virtual SmallVector< unsigned > getSearchIndexes(unsigned SearchNum, unsigned SearchRatio)
Give the folding position in the window algorithm, where different heuristics can be used.
void restoreMBB()
Erase the MIs in current MBB and restore the original MIs.
WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
SmallVector< MachineInstr * > TriMIs
TriMIs keeps the MIs of TripleMBB, which is used to restore TripleMBB.
A range adaptor for a pair of iterators.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CycleInfo::CycleT Cycle
Definition: CycleInfo.h:24
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig