LLVM 19.0.0git
GCNILPSched.cpp
Go to the documentation of this file.
1//===---------------------------- GCNILPSched.cpp - -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
14
15using namespace llvm;
16
17#define DEBUG_TYPE "machine-scheduler"
18
19namespace {
20
21class GCNILPScheduler {
22 struct Candidate : ilist_node<Candidate> {
23 SUnit *SU;
24
25 Candidate(SUnit *SU_)
26 : SU(SU_) {}
27 };
28
30 typedef simple_ilist<Candidate> Queue;
31 Queue PendingQueue;
32 Queue AvailQueue;
33 unsigned CurQueueId = 0;
34
35 std::vector<unsigned> SUNumbers;
36
37 /// CurCycle - The current scheduler state corresponds to this cycle.
38 unsigned CurCycle = 0;
39
40 unsigned getNodePriority(const SUnit *SU) const;
41
42 const SUnit *pickBest(const SUnit *left, const SUnit *right);
43 Candidate* pickCandidate();
44
45 void releasePending();
46 void advanceToCycle(unsigned NextCycle);
47 void releasePredecessors(const SUnit* SU);
48
49public:
50 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
51 const ScheduleDAG &DAG);
52};
53} // namespace
54
55/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56/// Smaller number is the higher priority.
57static unsigned
58CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
59 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
60 if (SethiUllmanNumber != 0)
61 return SethiUllmanNumber;
62
63 unsigned Extra = 0;
64 for (const SDep &Pred : SU->Preds) {
65 if (Pred.isCtrl()) continue; // ignore chain preds
66 SUnit *PredSU = Pred.getSUnit();
67 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
68 if (PredSethiUllman > SethiUllmanNumber) {
69 SethiUllmanNumber = PredSethiUllman;
70 Extra = 0;
71 }
72 else if (PredSethiUllman == SethiUllmanNumber)
73 ++Extra;
74 }
75
76 SethiUllmanNumber += Extra;
77
78 if (SethiUllmanNumber == 0)
79 SethiUllmanNumber = 1;
80
81 return SethiUllmanNumber;
82}
83
84// Lower priority means schedule further down. For bottom-up scheduling, lower
85// priority SUs are scheduled before higher priority SUs.
86unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
87 assert(SU->NodeNum < SUNumbers.size());
88 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
89 // If SU does not have a register use, i.e. it doesn't produce a value
90 // that would be consumed (e.g. store), then it terminates a chain of
91 // computation. Give it a large SethiUllman number so it will be
92 // scheduled right before its predecessors that it doesn't lengthen
93 // their live ranges.
94 return 0xffff;
95
96 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
97 // If SU does not have a register def, schedule it close to its uses
98 // because it does not lengthen any live ranges.
99 return 0;
100
101 return SUNumbers[SU->NodeNum];
102}
103
104/// closestSucc - Returns the scheduled cycle of the successor which is
105/// closest to the current cycle.
106static unsigned closestSucc(const SUnit *SU) {
107 unsigned MaxHeight = 0;
108 for (const SDep &Succ : SU->Succs) {
109 if (Succ.isCtrl()) continue; // ignore chain succs
110 unsigned Height = Succ.getSUnit()->getHeight();
111 // If there are bunch of CopyToRegs stacked up, they should be considered
112 // to be at the same position.
113 if (Height > MaxHeight)
114 MaxHeight = Height;
115 }
116 return MaxHeight;
117}
118
119/// calcMaxScratches - Returns an cost estimate of the worse case requirement
120/// for scratch registers, i.e. number of data dependencies.
121static unsigned calcMaxScratches(const SUnit *SU) {
122 unsigned Scratches = 0;
123 for (const SDep &Pred : SU->Preds) {
124 if (Pred.isCtrl()) continue; // ignore chain preds
125 Scratches++;
126 }
127 return Scratches;
128}
129
130// Return -1 if left has higher priority, 1 if right has higher priority.
131// Return 0 if latency-based priority is equivalent.
132static int BUCompareLatency(const SUnit *left, const SUnit *right) {
133 // Scheduling an instruction that uses a VReg whose postincrement has not yet
134 // been scheduled will induce a copy. Model this as an extra cycle of latency.
135 int LHeight = (int)left->getHeight();
136 int RHeight = (int)right->getHeight();
137
138 // If either node is scheduling for latency, sort them by height/depth
139 // and latency.
140
141 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142 // is enabled, grouping instructions by cycle, then its height is already
143 // covered so only its depth matters. We also reach this point if both stall
144 // but have the same height.
145 if (LHeight != RHeight)
146 return LHeight > RHeight ? 1 : -1;
147
148 int LDepth = left->getDepth();
149 int RDepth = right->getDepth();
150 if (LDepth != RDepth) {
151 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
152 << ") depth " << LDepth << " vs SU (" << right->NodeNum
153 << ") depth " << RDepth << "\n");
154 return LDepth < RDepth ? 1 : -1;
155 }
156 if (left->Latency != right->Latency)
157 return left->Latency > right->Latency ? 1 : -1;
158
159 return 0;
160}
161
162const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
163{
164 // TODO: add register pressure lowering checks
165
166 bool const DisableSchedCriticalPath = false;
167 int MaxReorderWindow = 6;
169 int spread = (int)left->getDepth() - (int)right->getDepth();
170 if (std::abs(spread) > MaxReorderWindow) {
171 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172 << left->getDepth() << " != SU(" << right->NodeNum
173 << "): " << right->getDepth() << "\n");
174 return left->getDepth() < right->getDepth() ? right : left;
175 }
176 }
177
178 bool const DisableSchedHeight = false;
179 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180 int spread = (int)left->getHeight() - (int)right->getHeight();
181 if (std::abs(spread) > MaxReorderWindow)
182 return left->getHeight() > right->getHeight() ? right : left;
183 }
184
185 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186 unsigned LPriority = getNodePriority(left);
187 unsigned RPriority = getNodePriority(right);
188
189 if (LPriority != RPriority)
190 return LPriority > RPriority ? right : left;
191
192 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
193 // e.g.
194 // t1 = op t2, c1
195 // t3 = op t4, c2
196 //
197 // and the following instructions are both ready.
198 // t2 = op c3
199 // t4 = op c4
200 //
201 // Then schedule t2 = op first.
202 // i.e.
203 // t4 = op c4
204 // t2 = op c3
205 // t1 = op t2, c1
206 // t3 = op t4, c2
207 //
208 // This creates more short live intervals.
209 unsigned LDist = closestSucc(left);
210 unsigned RDist = closestSucc(right);
211 if (LDist != RDist)
212 return LDist < RDist ? right : left;
213
214 // How many registers becomes live when the node is scheduled.
215 unsigned LScratch = calcMaxScratches(left);
216 unsigned RScratch = calcMaxScratches(right);
217 if (LScratch != RScratch)
218 return LScratch > RScratch ? right : left;
219
220 bool const DisableSchedCycles = false;
221 if (!DisableSchedCycles) {
222 int result = BUCompareLatency(left, right);
223 if (result != 0)
224 return result > 0 ? right : left;
225 return left;
226 }
227 else {
228 if (left->getHeight() != right->getHeight())
229 return (left->getHeight() > right->getHeight()) ? right : left;
230
231 if (left->getDepth() != right->getDepth())
232 return (left->getDepth() < right->getDepth()) ? right : left;
233 }
234
235 assert(left->NodeQueueId && right->NodeQueueId &&
236 "NodeQueueId cannot be zero");
237 return (left->NodeQueueId > right->NodeQueueId) ? right : left;
238}
239
240GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
241 if (AvailQueue.empty())
242 return nullptr;
243 auto Best = AvailQueue.begin();
244 for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
245 auto NewBestSU = pickBest(Best->SU, I->SU);
246 if (NewBestSU != Best->SU) {
247 assert(NewBestSU == I->SU);
248 Best = I;
249 }
250 }
251 return &*Best;
252}
253
254void GCNILPScheduler::releasePending() {
255 // Check to see if any of the pending instructions are ready to issue. If
256 // so, add them to the available queue.
257 for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
258 auto &C = *I++;
259 if (C.SU->getHeight() <= CurCycle) {
260 PendingQueue.remove(C);
261 AvailQueue.push_back(C);
262 C.SU->NodeQueueId = CurQueueId++;
263 }
264 }
265}
266
267/// Move the scheduler state forward by the specified number of Cycles.
268void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
269 if (NextCycle <= CurCycle)
270 return;
271 CurCycle = NextCycle;
272 releasePending();
273}
274
275void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
276 for (const auto &PredEdge : SU->Preds) {
277 auto PredSU = PredEdge.getSUnit();
278 if (PredEdge.isWeak())
279 continue;
280 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
281
282 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
283
284 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
285 PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
286 }
287}
288
289std::vector<const SUnit*>
290GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
291 const ScheduleDAG &DAG) {
292 auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
293
294 std::vector<SUnit> SUSavedCopy;
295 SUSavedCopy.resize(SUnits.size());
296
297 // we cannot save only those fields we touch: some of them are private
298 // so save units verbatim: this assumes SUnit should have value semantics
299 for (const SUnit &SU : SUnits)
300 SUSavedCopy[SU.NodeNum] = SU;
301
302 SUNumbers.assign(SUnits.size(), 0);
303 for (const SUnit &SU : SUnits)
304 CalcNodeSethiUllmanNumber(&SU, SUNumbers);
305
306 for (const auto *SU : BotRoots) {
307 AvailQueue.push_back(
308 *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
309 }
310 releasePredecessors(&DAG.ExitSU);
311
312 std::vector<const SUnit*> Schedule;
313 Schedule.reserve(SUnits.size());
314 while (true) {
315 if (AvailQueue.empty() && !PendingQueue.empty()) {
316 auto EarliestSU =
317 llvm::min_element(PendingQueue, [=](const Candidate &C1,
318 const Candidate &C2) {
319 return C1.SU->getHeight() < C2.SU->getHeight();
320 })->SU;
321 advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
322 }
323 if (AvailQueue.empty())
324 break;
325
326 LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
327 "Ready queue:";
328 for (auto &C
329 : AvailQueue) dbgs()
330 << ' ' << C.SU->NodeNum;
331 dbgs() << '\n';);
332
333 auto C = pickCandidate();
334 assert(C);
335 AvailQueue.remove(*C);
336 auto SU = C->SU;
337 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
338
339 advanceToCycle(SU->getHeight());
340
341 releasePredecessors(SU);
342 Schedule.push_back(SU);
343 SU->isScheduled = true;
344 }
345 assert(SUnits.size() == Schedule.size());
346
347 std::reverse(Schedule.begin(), Schedule.end());
348
349 // restore units
350 for (auto &SU : SUnits)
351 SU = SUSavedCopy[SU.NodeNum];
352
353 return Schedule;
354}
355
356namespace llvm {
357std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
358 const ScheduleDAG &DAG) {
359 GCNILPScheduler S;
360 return S.schedule(BotRoots, DAG);
361}
362}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static int BUCompareLatency(const SUnit *left, const SUnit *right)
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
Definition: GCNILPSched.cpp:58
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
Definition: Allocator.h:148
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned NumPreds
Definition: ScheduleDAG.h:266
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void dumpNode(const SUnit &SU) const =0
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:382
A simple intrusive list implementation.
Definition: simple_ilist.h:81
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto min_element(R &&Range)
Definition: STLExtras.h:1987
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::vector< const SUnit * > makeGCNILPScheduler(ArrayRef< const SUnit * > BotRoots, const ScheduleDAG &DAG)