Line data Source code
1 : //===---------------------------- GCNILPSched.cpp - -----------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "llvm/CodeGen/ScheduleDAG.h"
15 :
16 : using namespace llvm;
17 :
18 : #define DEBUG_TYPE "machine-scheduler"
19 :
20 : namespace {
21 :
22 2 : class GCNILPScheduler {
23 : struct Candidate : ilist_node<Candidate> {
24 : SUnit *SU;
25 :
26 : Candidate(SUnit *SU_)
27 906 : : SU(SU_) {}
28 : };
29 :
30 : SpecificBumpPtrAllocator<Candidate> Alloc;
31 : typedef simple_ilist<Candidate> Queue;
32 : Queue PendingQueue;
33 : Queue AvailQueue;
34 : unsigned CurQueueId = 0;
35 :
36 : std::vector<unsigned> SUNumbers;
37 :
38 : /// CurCycle - The current scheduler state corresponds to this cycle.
39 : unsigned CurCycle = 0;
40 :
41 : unsigned getNodePriority(const SUnit *SU) const;
42 :
43 : const SUnit *pickBest(const SUnit *left, const SUnit *right);
44 : Candidate* pickCandidate();
45 :
46 : void releasePending();
47 : void advanceToCycle(unsigned NextCycle);
48 : void releasePredecessors(const SUnit* SU);
49 :
50 : public:
51 : std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
52 : const ScheduleDAG &DAG);
53 : };
54 : } // namespace
55 :
56 : /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
57 : /// Smaller number is the higher priority.
58 : static unsigned
59 1412 : CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
60 1412 : unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
61 1412 : if (SethiUllmanNumber != 0)
62 : return SethiUllmanNumber;
63 :
64 : unsigned Extra = 0;
65 4962 : for (const SDep &Pred : SU->Preds) {
66 4508 : if (Pred.isCtrl()) continue; // ignore chain preds
67 : SUnit *PredSU = Pred.getSUnit();
68 958 : unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
69 958 : if (PredSethiUllman > SethiUllmanNumber) {
70 476 : SethiUllmanNumber = PredSethiUllman;
71 : Extra = 0;
72 : }
73 482 : else if (PredSethiUllman == SethiUllmanNumber)
74 446 : ++Extra;
75 : }
76 :
77 454 : SethiUllmanNumber += Extra;
78 :
79 454 : if (SethiUllmanNumber == 0)
80 4 : SethiUllmanNumber = 1;
81 :
82 454 : return SethiUllmanNumber;
83 : }
84 :
85 : // Lower priority means schedule further down. For bottom-up scheduling, lower
86 : // priority SUs are scheduled before higher priority SUs.
87 : unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
88 : assert(SU->NodeNum < SUNumbers.size());
89 19386 : if (SU->NumSuccs == 0 && SU->NumPreds != 0)
90 : // If SU does not have a register use, i.e. it doesn't produce a value
91 : // that would be consumed (e.g. store), then it terminates a chain of
92 : // computation. Give it a large SethiUllman number so it will be
93 : // scheduled right before its predecessors that it doesn't lengthen
94 : // their live ranges.
95 : return 0xffff;
96 :
97 19318 : if (SU->NumPreds == 0 && SU->NumSuccs != 0)
98 : // If SU does not have a register def, schedule it close to its uses
99 : // because it does not lengthen any live ranges.
100 : return 0;
101 :
102 38628 : return SUNumbers[SU->NodeNum];
103 : }
104 :
105 : /// closestSucc - Returns the scheduled cycle of the successor which is
106 : /// closest to the current cycle.
107 17624 : static unsigned closestSucc(const SUnit *SU) {
108 : unsigned MaxHeight = 0;
109 308592 : for (const SDep &Succ : SU->Succs) {
110 290968 : if (Succ.isCtrl()) continue; // ignore chain succs
111 : unsigned Height = Succ.getSUnit()->getHeight();
112 : // If there are bunch of CopyToRegs stacked up, they should be considered
113 : // to be at the same position.
114 22988 : if (Height > MaxHeight)
115 : MaxHeight = Height;
116 : }
117 17624 : return MaxHeight;
118 : }
119 :
120 : /// calcMaxScratches - Returns an cost estimate of the worse case requirement
121 : /// for scratch registers, i.e. number of data dependencies.
122 : static unsigned calcMaxScratches(const SUnit *SU) {
123 : unsigned Scratches = 0;
124 54320 : for (const SDep &Pred : SU->Preds) {
125 36708 : if (Pred.isCtrl()) continue; // ignore chain preds
126 35224 : Scratches++;
127 : }
128 : return Scratches;
129 : }
130 :
131 : // Return -1 if left has higher priority, 1 if right has higher priority.
132 : // Return 0 if latency-based priority is equivalent.
133 8806 : static int BUCompareLatency(const SUnit *left, const SUnit *right) {
134 : // Scheduling an instruction that uses a VReg whose postincrement has not yet
135 : // been scheduled will induce a copy. Model this as an extra cycle of latency.
136 8806 : int LHeight = (int)left->getHeight();
137 8806 : int RHeight = (int)right->getHeight();
138 :
139 : // If either node is scheduling for latency, sort them by height/depth
140 : // and latency.
141 :
142 : // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
143 : // is enabled, grouping instructions by cycle, then its height is already
144 : // covered so only its depth matters. We also reach this point if both stall
145 : // but have the same height.
146 8806 : if (LHeight != RHeight)
147 0 : return LHeight > RHeight ? 1 : -1;
148 :
149 8806 : int LDepth = left->getDepth();
150 8806 : int RDepth = right->getDepth();
151 8806 : if (LDepth != RDepth) {
152 : LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
153 : << ") depth " << LDepth << " vs SU (" << right->NodeNum
154 : << ") depth " << RDepth << "\n");
155 0 : return LDepth < RDepth ? 1 : -1;
156 : }
157 8806 : if (left->Latency != right->Latency)
158 0 : return left->Latency > right->Latency ? 1 : -1;
159 :
160 : return 0;
161 : }
162 :
163 9788 : const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
164 : {
165 : // TODO: add register pressure lowering checks
166 :
167 : bool const DisableSchedCriticalPath = false;
168 : int MaxReorderWindow = 6;
169 : if (!DisableSchedCriticalPath) {
170 9788 : int spread = (int)left->getDepth() - (int)right->getDepth();
171 9788 : if (std::abs(spread) > MaxReorderWindow) {
172 : LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
173 : << left->getDepth() << " != SU(" << right->NodeNum
174 : << "): " << right->getDepth() << "\n");
175 76 : return left->getDepth() < right->getDepth() ? right : left;
176 : }
177 : }
178 :
179 : bool const DisableSchedHeight = false;
180 9712 : if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
181 838 : int spread = (int)left->getHeight() - (int)right->getHeight();
182 838 : if (std::abs(spread) > MaxReorderWindow)
183 0 : return left->getHeight() > right->getHeight() ? right : left;
184 : }
185 :
186 : // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
187 : unsigned LPriority = getNodePriority(left);
188 : unsigned RPriority = getNodePriority(right);
189 :
190 9712 : if (LPriority != RPriority)
191 1560 : return LPriority > RPriority ? right : left;
192 :
193 : // Try schedule def + use closer when Sethi-Ullman numbers are the same.
194 : // e.g.
195 : // t1 = op t2, c1
196 : // t3 = op t4, c2
197 : //
198 : // and the following instructions are both ready.
199 : // t2 = op c3
200 : // t4 = op c4
201 : //
202 : // Then schedule t2 = op first.
203 : // i.e.
204 : // t4 = op c4
205 : // t2 = op c3
206 : // t1 = op t2, c1
207 : // t3 = op t4, c2
208 : //
209 : // This creates more short live intervals.
210 8812 : unsigned LDist = closestSucc(left);
211 8812 : unsigned RDist = closestSucc(right);
212 8812 : if (LDist != RDist)
213 6 : return LDist < RDist ? right : left;
214 :
215 : // How many registers becomes live when the node is scheduled.
216 : unsigned LScratch = calcMaxScratches(left);
217 : unsigned RScratch = calcMaxScratches(right);
218 8806 : if (LScratch != RScratch)
219 0 : return LScratch > RScratch ? right : left;
220 :
221 : bool const DisableSchedCycles = false;
222 : if (!DisableSchedCycles) {
223 8806 : int result = BUCompareLatency(left, right);
224 8806 : if (result != 0)
225 0 : return result > 0 ? right : left;
226 : return left;
227 : }
228 : else {
229 : if (left->getHeight() != right->getHeight())
230 : return (left->getHeight() > right->getHeight()) ? right : left;
231 :
232 : if (left->getDepth() != right->getDepth())
233 : return (left->getDepth() < right->getDepth()) ? right : left;
234 : }
235 :
236 : assert(left->NodeQueueId && right->NodeQueueId &&
237 : "NodeQueueId cannot be zero");
238 : return (left->NodeQueueId > right->NodeQueueId) ? right : left;
239 : }
240 :
241 454 : GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
242 454 : if (AvailQueue.empty())
243 : return nullptr;
244 : auto Best = AvailQueue.begin();
245 10242 : for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
246 9788 : auto NewBestSU = pickBest(Best->SU, I->SU);
247 9788 : if (NewBestSU != Best->SU) {
248 : assert(NewBestSU == I->SU);
249 : Best = I;
250 : }
251 : }
252 : return &*Best;
253 : }
254 :
255 66 : void GCNILPScheduler::releasePending() {
256 : // Check to see if any of the pending instructions are ready to issue. If
257 : // so, add them to the available queue.
258 518 : for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
259 : auto &C = *I++;
260 452 : if (C.SU->getHeight() <= CurCycle) {
261 : PendingQueue.remove(C);
262 : AvailQueue.push_back(C);
263 452 : C.SU->NodeQueueId = CurQueueId++;
264 : }
265 : }
266 66 : }
267 :
268 : /// Move the scheduler state forward by the specified number of Cycles.
269 : void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
270 454 : if (NextCycle <= CurCycle)
271 : return;
272 66 : CurCycle = NextCycle;
273 66 : releasePending();
274 : }
275 :
276 456 : void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
277 4964 : for (const auto &PredEdge : SU->Preds) {
278 : auto PredSU = PredEdge.getSUnit();
279 : if (PredEdge.isWeak())
280 : continue;
281 : assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
282 :
283 4508 : PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
284 :
285 4508 : if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
286 : PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
287 : }
288 456 : }
289 :
290 : std::vector<const SUnit*>
291 2 : GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
292 : const ScheduleDAG &DAG) {
293 : auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
294 :
295 2 : std::vector<SUnit> SUSavedCopy;
296 4 : SUSavedCopy.resize(SUnits.size());
297 :
298 : // we cannot save only those fields we touch: some of them are private
299 : // so save units verbatim: this assumes SUnit should have value semantics
300 456 : for (const SUnit &SU : SUnits)
301 908 : SUSavedCopy[SU.NodeNum] = SU;
302 :
303 2 : SUNumbers.assign(SUnits.size(), 0);
304 456 : for (const SUnit &SU : SUnits)
305 454 : CalcNodeSethiUllmanNumber(&SU, SUNumbers);
306 :
307 4 : for (auto SU : BotRoots) {
308 : AvailQueue.push_back(
309 : *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
310 : }
311 2 : releasePredecessors(&DAG.ExitSU);
312 :
313 : std::vector<const SUnit*> Schedule;
314 4 : Schedule.reserve(SUnits.size());
315 : while (true) {
316 456 : if (AvailQueue.empty() && !PendingQueue.empty()) {
317 : auto EarliestSU = std::min_element(
318 : PendingQueue.begin(), PendingQueue.end(),
319 : [=](const Candidate& C1, const Candidate& C2) {
320 : return C1.SU->getHeight() < C2.SU->getHeight();
321 66 : })->SU;
322 134 : advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
323 : }
324 456 : if (AvailQueue.empty())
325 : break;
326 :
327 : LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
328 : "Ready queue:";
329 : for (auto &C
330 : : AvailQueue) dbgs()
331 : << ' ' << C.SU->NodeNum;
332 : dbgs() << '\n';);
333 :
334 454 : auto C = pickCandidate();
335 : assert(C);
336 : AvailQueue.remove(*C);
337 454 : auto SU = C->SU;
338 : LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
339 :
340 : advanceToCycle(SU->getHeight());
341 :
342 454 : releasePredecessors(SU);
343 454 : Schedule.push_back(SU);
344 454 : SU->isScheduled = true;
345 454 : }
346 : assert(SUnits.size() == Schedule.size());
347 :
348 : std::reverse(Schedule.begin(), Schedule.end());
349 :
350 : // restore units
351 456 : for (auto &SU : SUnits)
352 908 : SU = SUSavedCopy[SU.NodeNum];
353 :
354 2 : return Schedule;
355 : }
356 :
357 : namespace llvm {
358 2 : std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
359 : const ScheduleDAG &DAG) {
360 2 : GCNILPScheduler S;
361 2 : return S.schedule(BotRoots, DAG);
362 : }
363 : }
|