LLVM 23.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
23
24namespace llvm {
25
27class SIRegisterInfo;
28class GCNSubtarget;
29class GCNSchedStage;
30
40
41#ifndef NDEBUG
42raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
43#endif
44
45/// This is a minimal scheduler strategy. The main difference between this
46/// and the GenericScheduler is that GCNSchedStrategy uses different
47/// heuristics to determine excess/critical pressure sets.
49protected:
50 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
51
52 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
53 const RegPressureTracker &RPTracker,
54 SchedCandidate &Cand, bool &IsPending,
55 bool IsBottomUp);
56
57 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
58 const RegPressureTracker &RPTracker,
59 const SIRegisterInfo *SRI, unsigned SGPRPressure,
60 unsigned VGPRPressure, bool IsBottomUp);
61
62 /// Estimate how many cycles \p SU must wait due to structural hazards at the
63 /// current boundary cycle. Returns zero when no stall is required.
64 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
65
66 /// Evaluates instructions in the pending queue using a subset of scheduling
67 /// heuristics.
68 ///
69 /// Instructions that cannot be issued due to hardware constraints are placed
70 /// in the pending queue rather than the available queue, making them normally
71 /// invisible to scheduling heuristics. However, in certain scenarios (such as
72 /// avoiding register spilling), it may be beneficial to consider scheduling
73 /// these not-yet-ready instructions.
75 SchedBoundary *Zone) const;
76
77 void printCandidateDecision(const SchedCandidate &Current,
78 const SchedCandidate &Preferred);
79
80 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
81 SUnit *SU, std::vector<unsigned> &Pressure,
82 std::vector<unsigned> &MaxPressure,
85 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
86
87 std::vector<unsigned> Pressure;
88
89 std::vector<unsigned> MaxPressure;
90
92
94
96
98
99 // Scheduling stages for this strategy.
101
102 // Pointer to the current SchedStageID.
104
105 // GCN RP Tracker for top-down scheduling
107
108 // GCN RP Tracker for botttom-up scheduling
110
111 bool UseGCNTrackers = false;
112
113 std::optional<bool> GCNTrackersOverride;
114
115public:
116 // schedule() have seen register pressure over the critical limits and had to
117 // track register pressure for actual scheduling heuristics.
119
120 // Schedule known to have excess register pressure. Be more conservative in
121 // increasing ILP and preserving VGPRs.
122 bool KnownExcessRP = false;
123
124 // An error margin is necessary because of poor performance of the generic RP
125 // tracker and can be adjusted up for tuning heuristics to try and more
126 // aggressively reduce register pressure.
127 unsigned ErrorMargin = 3;
128
129 // Bias for SGPR limits under a high register pressure.
130 const unsigned HighRPSGPRBias = 7;
131
132 // Bias for VGPR limits under a high register pressure.
133 const unsigned HighRPVGPRBias = 7;
134
136
138
139 unsigned SGPRLimitBias = 0;
140
141 unsigned VGPRLimitBias = 0;
142
144
145 SUnit *pickNode(bool &IsTopNode) override;
146
147 void schedNode(SUnit *SU, bool IsTopNode) override;
148
149 void initialize(ScheduleDAGMI *DAG) override;
150
151 unsigned getTargetOccupancy() { return TargetOccupancy; }
152
153 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
154
156
157 // Advances stage. Returns true if there are remaining stages.
158 bool advanceStage();
159
160 bool hasNextStage() const;
161
162 bool useGCNTrackers() const {
163 return GCNTrackersOverride.value_or(UseGCNTrackers);
164 }
165
167
169
171};
172
173/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
174/// maximum number of waves per simd).
176public:
178 bool IsLegacyScheduler = false);
179};
180
181/// The goal of this scheduling strategy is to maximize ILP for a single wave
182/// (i.e. latency hiding).
184protected:
185 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
186 SchedBoundary *Zone) const override;
187
188public:
190};
191
192/// The goal of this scheduling strategy is to maximize memory clause for a
193/// single wave.
195protected:
196 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
197 SchedBoundary *Zone) const override;
198
199public:
201};
202
204 unsigned ScheduleLength;
205 unsigned BubbleCycles;
206
207public:
208 ScheduleMetrics() = default;
209 ScheduleMetrics(unsigned L, unsigned BC)
210 : ScheduleLength(L), BubbleCycles(BC) {}
211 unsigned getLength() const { return ScheduleLength; }
212 unsigned getBubbles() const { return BubbleCycles; }
213 unsigned getMetric() const {
214 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
215 // Metric is zero if the amount of bubbles is less than 1% which is too
216 // small. So, return 1.
217 return Metric ? Metric : 1;
218 }
219 static const unsigned ScaleFactor;
220};
221
223 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
224 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
225 << Sm.getLength() << " ]\n";
226 return OS;
227}
228
229class GCNScheduleDAGMILive;
232 // The live in/out pressure as indexed by the first or last MI in the region
233 // before scheduling.
235 // The mapping of RegionIDx to key instruction
236 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
237 // Whether we are calculating LiveOuts or LiveIns
238 bool IsLiveOut;
239
240public:
241 RegionPressureMap() = default;
243 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
244 // Build the Instr->LiveReg and RegionIdx->Instr maps
245 void buildLiveRegMap();
246
247 // Retrieve the LiveReg for a given RegionIdx
249 assert(IdxToInstruction.contains(RegionIdx));
250 MachineInstr *Key = IdxToInstruction[RegionIdx];
251 return RegionLiveRegMap[Key];
252 }
253};
254
255/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
256/// boundary is inclusive, the upper boundary is exclusive.
258 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
259
261 friend class GCNSchedStage;
266 friend class PreRARematStage;
268 friend class RegionPressureMap;
269
270 const GCNSubtarget &ST;
271
273
274 // Occupancy target at the beginning of function scheduling cycle.
275 unsigned StartingOccupancy;
276
277 // Minimal real occupancy recorder for the function.
278 unsigned MinOccupancy;
279
280 // Vector of regions recorder for later rescheduling
282
283 // Record regions with high register pressure.
284 BitVector RegionsWithHighRP;
285
286 // Record regions with excess register pressure over the physical register
287 // limit. Register pressure in these regions usually will result in spilling.
288 BitVector RegionsWithExcessRP;
289
290 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
291 BitVector RegionsWithIGLPInstrs;
292
293 // Region live-in cache.
295
296 // Region pressure cache.
298
299 // Temporary basic block live-in cache.
301
302 // The map of the initial first region instruction to region live in registers
304
305 // Calculate the map of the initial first region instruction to region live in
306 // registers
308
309 // Calculate the map of the initial last region instruction to region live out
310 // registers
312 getRegionLiveOutMap() const;
313
314 // The live out registers per region. These are internally stored as a map of
315 // the initial last region instruction to region live out registers, but can
316 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
317 RegionPressureMap RegionLiveOuts;
318
319 // Return current region pressure.
320 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
321
322 // Compute and cache live-ins and pressure for all regions in block.
323 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
324
325 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
326 void setTargetOccupancy(unsigned TargetOccupancy);
327
328 void runSchedStages();
329
330 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
331
332public:
334 std::unique_ptr<MachineSchedStrategy> S);
335
336 void schedule() override;
337
338 void finalizeSchedule() override;
339};
340
341// GCNSchedStrategy applies multiple scheduling stages to a function.
343protected:
345
347
349
351
353
355
356 // The current block being scheduled.
358
359 // Current region index.
360 unsigned RegionIdx = 0;
361
362 // Record the original order of instructions before scheduling.
363 std::vector<MachineInstr *> Unsched;
364
365 // RP before scheduling the current region.
367
368 // RP after scheduling the current region.
370
371 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
372
374
375public:
376 // Initialize state for a scheduling stage. Returns false if the current stage
377 // should be skipped.
378 virtual bool initGCNSchedStage();
379
380 // Finalize state after finishing a scheduling pass on the function.
381 virtual void finalizeGCNSchedStage();
382
383 // Setup for scheduling a region. Returns false if the current region should
384 // be skipped.
385 virtual bool initGCNRegion();
386
387 // Finalize state after scheduling a region.
388 virtual void finalizeGCNRegion();
389
390 // Track whether a new region is also a new MBB.
391 void setupNewBlock();
392
393 // Check result of scheduling.
394 void checkScheduling();
395
396 // computes the given schedule virtual execution time in clocks
397 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
399 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
400 DenseMap<unsigned, unsigned> &ReadyCycles,
401 const TargetSchedModel &SM);
402
403 // Returns true if scheduling should be reverted.
404 virtual bool shouldRevertScheduling(unsigned WavesAfter);
405
406 // Returns true if current region has known excess pressure.
407 bool isRegionWithExcessRP() const {
408 return DAG.RegionsWithExcessRP[RegionIdx];
409 }
410
411 // The region number this stage is currently working on
412 unsigned getRegionIdx() { return RegionIdx; }
413
414 // Returns true if the new schedule may result in more spilling.
415 bool mayCauseSpilling(unsigned WavesAfter);
416
417 /// Sets the schedule of region \p RegionIdx to \p MIOrder. The MIs in \p
418 /// MIOrder must be exactly the same as the ones currently existing inside the
419 /// region, only in a different order that honors def-use chains.
420 void modifyRegionSchedule(unsigned RegionIdx,
422
424
425 virtual ~GCNSchedStage() = default;
426};
427
435
437private:
438 // Record regions with excess archvgpr register pressure over the physical
439 // register limit. Register pressure in these regions usually will result in
440 // spilling.
441 BitVector RegionsWithExcessArchVGPR;
442
443 const SIInstrInfo *TII;
444 const SIRegisterInfo *SRI;
445
446 /// Do a speculative rewrite and collect copy locations. The speculative
447 /// rewrite allows us to calculate the RP of the code after the rewrite, and
448 /// the copy locations allow us to calculate the total cost of copies required
449 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
450 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
451 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
452 bool
453 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
454 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
456
457 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
458 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
459 /// costs, and \p RewriteCands to undo rewriting.
460 int64_t getRewriteCost(
461 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
462 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
463 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
464
465 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
466 bool
467 rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
468
469 /// \returns true if this MI is a rewrite candidate.
470 bool isRewriteCandidate(MachineInstr *MI) const;
471
472 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
473 /// DefIdxs
474 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
476
477 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
478 /// ReachingUses
479 void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
481
482public:
483 bool initGCNSchedStage() override;
484
487};
488
490private:
491 // Save the initial occupancy before starting this stage.
492 unsigned InitialOccupancy;
493 // Save the temporary target occupancy before starting this stage.
494 unsigned TempTargetOccupancy;
495 // Track whether any region was scheduled by this stage.
496 bool IsAnyRegionScheduled;
497
498public:
499 bool initGCNSchedStage() override;
500
501 void finalizeGCNSchedStage() override;
502
503 bool initGCNRegion() override;
504
505 bool shouldRevertScheduling(unsigned WavesAfter) override;
506
509};
510
511// Retry function scheduling if we found resulting occupancy and it is
512// lower than used for other scheduling passes. This will give more freedom
513// to schedule low register pressure blocks.
515public:
516 bool initGCNSchedStage() override;
517
518 bool initGCNRegion() override;
519
520 bool shouldRevertScheduling(unsigned WavesAfter) override;
521
524};
525
526/// Attempts to reduce function spilling or, if there is no spilling, to
527/// increase function occupancy by one with respect to register usage by sinking
528/// rematerializable instructions to their use. When the stage estimates that
529/// reducing spilling or increasing occupancy is possible, it tries to
530/// rematerialize as few registers as possible to reduce potential negative
531/// effects on function latency.
532///
533/// The stage only supports rematerializing registers that meet all of the
534/// following constraints.
535/// 1. The register is virtual and has a single defining instruction.
536/// 2. The single defining instruction is either deemed rematerializable by the
537/// target-independent logic, or if not, has no non-constant and
538/// non-ignorable physical register use.
539/// 3 The register has no virtual register use whose live range would be
540/// extended by the rematerialization.
541/// 4. The register has a single non-debug user in a different region from its
542/// defining region.
543/// 5. The register is not used by or using another register that is going to be
544/// rematerialized.
546private:
547 using RegisterIdx = Rematerializer::RegisterIdx;
548
549 /// A scored rematerialization candidate. Higher scores indicate more
550 /// beneficial rematerializations. A null score indicate the rematerialization
551 /// is not helpful to reduce RP in target regions.
552 struct ScoredRemat {
553 /// The register index handle in the rematerializer.
554 RegisterIdx RegIdx;
555 /// Regions in which the register is live-in/live-out/live anywhere.
556 BitVector LiveIn, LiveOut, Live;
557 /// Subset of \ref Live regions in which the rematerialization is not
558 /// guaranteed to reduce RP (i.e., regions in which the register is not
559 /// live-through and unused).
560 BitVector UnpredictableRPSave;
561 /// Expected register pressure decrease induced by rematerializing this
562 /// candidate.
563 GCNRegPressure RPSave;
564
565 /// Execution frequency information required by scoring heuristics.
566 /// Frequencies are scaled down if they are high to avoid overflow/underflow
567 /// when combining them.
568 struct FreqInfo {
569 /// Per-region execution frequencies. 0 when unknown.
571 /// Minimum and maximum observed frequencies.
573
575
576 private:
577 static const uint64_t ScaleFactor = 1024;
578 };
579
580 /// Initializes the candidate with state-independent characteristics for
581 /// rematerializable register with index handle \p RegIdx. This doesn't
582 /// update the actual score (call \ref update for this).
583 void init(RegisterIdx RegIdx, const FreqInfo &Freq,
584 const Rematerializer &Remater, GCNScheduleDAGMILive &DAG);
585
586 /// Rematerializes the candidate using the \p Remater.
587 void rematerialize(Rematerializer &Remater) const;
588
589 /// Determines whether this rematerialization may be beneficial in at least
590 /// one target region.
591 bool maybeBeneficial(const BitVector &TargetRegions,
592 ArrayRef<GCNRPTarget> RPTargets) const;
593
594 /// Rematerializes the candidate and returns the new MI. This removes the
595 /// rematerialized register from live-in/out lists in the \p DAG and updates
596 /// \p RPTargets in all affected regions. Regions in which RP savings are
597 /// not guaranteed are set in \p RecomputeRP.
598 MachineInstr *rematerialize(BitVector &RecomputeRP,
601
602 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
603 /// \p RegionFreq indicates the frequency of each region.
604 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
605 const FreqInfo &Freq, bool ReduceSpill);
606
607 /// Returns whether the current score is null, indicating the
608 /// rematerialization is useless.
609 bool hasNullScore() const { return !RegionImpact; }
610
611 /// Compare score components of non-null scores pair-wise. Scores shouldn't
612 /// be null (as defined by \ref hasNullScore).
613 bool operator<(const ScoredRemat &O) const {
614 assert(!hasNullScore() && "this has null score");
615 assert(!O.hasNullScore() && "other has null score");
616 if (MaxFreq != O.MaxFreq)
617 return MaxFreq < O.MaxFreq;
618 if (FreqDiff != O.FreqDiff)
619 return FreqDiff < O.FreqDiff;
620 if (RegionImpact != O.RegionImpact)
621 return RegionImpact < O.RegionImpact;
622 // Break ties using register index handles. If the two registers are
623 // connected in some dependency DAG of rematerializable registers, this
624 // will tend to give a higher score to the register further from the
625 // dependency DAG's root. If the two registers are disconnected, this will
626 // give a higher score to the register with lower virtual register index.
627 // In general, within a region, this should prefer registers defined
628 // earlier that have longer live ranges in their defining region (since
629 // the registers we consider are always live-out in their defining
630 // region).
631 return RegIdx > O.RegIdx;
632 }
633
634#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
635 Printable print() const;
636#endif
637
638 private:
639 // The three members below are the scoring components, top to bottom from
640 // most important to least important when comparing candidates.
641
642 /// Frequency of impacted target region with highest known frequency. This
643 /// only matters when the stage is trying to reduce spilling, so it is
644 /// always 0 when it is not.
645 uint64_t MaxFreq;
646 /// Frequency difference between defining and using regions. Negative values
647 /// indicate we are rematerializing to higher frequency regions; positive
648 /// values indicate the contrary.
649 int64_t FreqDiff;
650 /// Expected number of target regions impacted by the rematerialization,
651 /// scaled by the size of the register being rematerialized.
652 unsigned RegionImpact;
653 };
654
655 /// Register pressure targets for all regions.
656 SmallVector<GCNRPTarget> RPTargets;
657 /// Regions which are above the stage's RP target.
658 BitVector TargetRegions;
659 /// The target occupancy the set is trying to achieve. Empty when the
660 /// objective is spilling reduction.
661 std::optional<unsigned> TargetOcc;
662 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
663 unsigned AchievedOcc;
664 /// After successful stage initialization, indicates which regions should be
665 /// rescheduled.
666 BitVector RescheduleRegions;
667
668 /// Underlying utilities to identify and perform rematerializations.
669 Rematerializer Remater;
670
671 struct RollbackSupport {
673 /// The register index handle in the rematerializer.
674 RegisterIdx RegIdx;
675 /// Regions in which the original register was live-in or live-out.
677
681 };
682
683 /// Rollback listener.
684 Rollbacker Listener;
685 /// Registers removed from live-maps along with bitvectors indicationg the
686 /// regions in which they were live-ins and live-outs.
687 SmallVector<LiveMapUpdate> LiveMapUpdates;
688
689 /// Attaches the rollback listener to the rematerializer.
690 RollbackSupport(Rematerializer &Remater) { Remater.addListener(&Listener); }
691 };
692
693 /// Rollback support. Maintained through a unique pointer because it is
694 /// optional and needs to persist between stage initialization and
695 /// finalization.
696 std::unique_ptr<RollbackSupport> Rollback;
697
698 /// State of a region pre-re-scheduling but post-rematerializations that we
699 /// must keep to be able to revert re-scheduling effects.
700 struct RegionSchedRevert {
701 /// Region number;
702 unsigned RegionIdx;
703 /// Original instruction order (both debug and non-debug MIs).
704 std::vector<MachineInstr *> OrigMIOrder;
705 /// Maximum pressure recorded in the region.
706 GCNRegPressure MaxPressure;
707
708 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
709 const GCNRegPressure &MaxPressure)
710 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
711 MaxPressure(MaxPressure) {}
712 };
713 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
714 /// regions.
715 SmallVector<RegionSchedRevert> RegionReverts;
716 /// Whether we should revert all re-scheduled regions.
717 bool RevertAllRegions = false;
718
719 /// Returns the occupancy the stage is trying to achieve.
720 unsigned getStageTargetOccupancy() const;
721
722 /// Determines the stage's objective (increasing occupancy or reducing
723 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
724 /// achieve that objective and mark those that don't achieve it in \ref
725 /// TargetRegions. Returns whether there is any target region.
726 bool setObjective();
727
728 /// In all regions set in \p Regions, saves pressure \p RPSave and clear it as
729 /// a target if its RP target has been reached.
730 void updateRPTargets(const BitVector &Regions, const GCNRegPressure &RPSave);
731
732 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
733 /// again all \ref TargetRegions that were optimistically marked as satisfied
734 /// but are actually not, and returns whether there were any such regions.
735 bool updateAndVerifyRPTargets(const BitVector &Regions);
736
737 /// Removes register \p Reg from the live-ins of regions set in \p LiveIn and
738 /// the live-outs of regions set in \p LiveOut.
739 void removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
740 const BitVector &LiveOut);
741
742 /// Adds register \p Reg with mask \p Mask to the live-ins of regions set in
743 /// \p LiveIn and the live-outs of regions set in \p LiveOut.
744 void addToLiveMaps(Register Reg, LaneBitmask Mask, const BitVector &LiveIn,
745 const BitVector &LiveOut);
746
747 /// If remat alone did not increase occupancy to the target one, rollbacks all
748 /// rematerializations and resets live-ins/RP in all regions impacted by the
749 /// stage to their pre-stage values.
750 void finalizeGCNSchedStage() override;
751
752public:
753 bool initGCNSchedStage() override;
754
755 bool initGCNRegion() override;
756
757 void finalizeGCNRegion() override;
758
759 bool shouldRevertScheduling(unsigned WavesAfter) override;
760
762 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
763 RescheduleRegions(DAG.Regions.size()),
764 Remater(MF, DAG.Regions, *DAG.LIS) {
765 const unsigned NumRegions = DAG.Regions.size();
766 RPTargets.reserve(NumRegions);
767 }
768};
769
777
786
788private:
789 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
790
791 bool HasIGLPInstrs = false;
792
793public:
794 void schedule() override;
795
796 void finalizeSchedule() override;
797
799 std::unique_ptr<MachineSchedStrategy> S,
800 bool RemoveKillFlags);
801};
802
803} // End namespace llvm
804
805#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr unsigned SM(unsigned Version)
MIR-level target-independent rematerialization helpers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNSchedStage() override
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
MIR-level target-independent rematerializer.
unsigned RegisterIdx
Index type for rematerializable registers.
RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Rematerializer listener with the ability to re-create deleted registers and rollback rematerializatio...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
BitVector LiveIn
Regions in which the original register was live-in or live-out.
LiveMapUpdate(RegisterIdx RegIdx, const BitVector &LiveIn, const BitVector &LiveOut)
RegisterIdx RegIdx
The register index handle in the rematerializer.
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)