LLVM 19.0.0git
GCNMinRegStrategy.cpp
Go to the documentation of this file.
1//===- GCNMinRegStrategy.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines and implements the class GCNMinRegScheduler, which
11/// implements an experimental, simple scheduler whose main goal is to learn
12/// ways about consuming less possible registers for a region.
13///
14//===----------------------------------------------------------------------===//
15
17using namespace llvm;
18
19#define DEBUG_TYPE "machine-scheduler"
20
21namespace {
22
23class GCNMinRegScheduler {
24 struct Candidate : ilist_node<Candidate> {
25 const SUnit *SU;
26 int Priority;
27
28 Candidate(const SUnit *SU_, int Priority_ = 0)
29 : SU(SU_), Priority(Priority_) {}
30 };
31
33 using Queue = simple_ilist<Candidate>;
34 Queue RQ; // Ready queue
35
36 std::vector<unsigned> NumPreds;
37
38 bool isScheduled(const SUnit *SU) const {
39 assert(!SU->isBoundaryNode());
40 return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
41 }
42
43 void setIsScheduled(const SUnit *SU) {
44 assert(!SU->isBoundaryNode());
45 NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
46 }
47
48 unsigned getNumPreds(const SUnit *SU) const {
49 assert(!SU->isBoundaryNode());
50 assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
51 return NumPreds[SU->NodeNum];
52 }
53
54 unsigned decNumPreds(const SUnit *SU) {
55 assert(!SU->isBoundaryNode());
56 assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
57 return --NumPreds[SU->NodeNum];
58 }
59
60 void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
61
62 int getReadySuccessors(const SUnit *SU) const;
63 int getNotReadySuccessors(const SUnit *SU) const;
64
65 template <typename Calc>
66 unsigned findMax(unsigned Num, Calc C);
67
68 Candidate* pickCandidate();
69
70 void bumpPredsPriority(const SUnit *SchedSU, int Priority);
71 void releaseSuccessors(const SUnit* SU, int Priority);
72
73public:
74 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
75 const ScheduleDAG &DAG);
76};
77
78} // end anonymous namespace
79
80void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
81 NumPreds.resize(SUnits.size());
82 for (unsigned I = 0; I < SUnits.size(); ++I)
83 NumPreds[I] = SUnits[I].NumPredsLeft;
84}
85
86int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
87 unsigned NumSchedSuccs = 0;
88 for (auto SDep : SU->Succs) {
89 bool wouldBeScheduled = true;
90 for (auto PDep : SDep.getSUnit()->Preds) {
91 auto PSU = PDep.getSUnit();
92 assert(!PSU->isBoundaryNode());
93 if (PSU != SU && !isScheduled(PSU)) {
94 wouldBeScheduled = false;
95 break;
96 }
97 }
98 NumSchedSuccs += wouldBeScheduled ? 1 : 0;
99 }
100 return NumSchedSuccs;
101}
102
103int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
104 return SU->Succs.size() - getReadySuccessors(SU);
105}
106
107template <typename Calc>
108unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
109 assert(!RQ.empty() && Num <= RQ.size());
110
111 using T = decltype(C(*RQ.begin())) ;
112
113 T Max = std::numeric_limits<T>::min();
114 unsigned NumMax = 0;
115 for (auto I = RQ.begin(); Num; --Num) {
116 T Cur = C(*I);
117 if (Cur >= Max) {
118 if (Cur > Max) {
119 Max = Cur;
120 NumMax = 1;
121 } else
122 ++NumMax;
123 auto &Cand = *I++;
124 RQ.remove(Cand);
125 RQ.push_front(Cand);
126 continue;
127 }
128 ++I;
129 }
130 return NumMax;
131}
132
133GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
134 do {
135 unsigned Num = RQ.size();
136 if (Num == 1) break;
137
138 LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num
139 << '\n');
140 Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
141 if (Num == 1) break;
142
143 LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
144 << Num << '\n');
145 Num = findMax(Num, [=](const Candidate &C) {
146 auto SU = C.SU;
147 int Res = getNotReadySuccessors(SU);
148 LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
149 << Res << " successors, metric = " << -Res << '\n');
150 return -Res;
151 });
152 if (Num == 1) break;
153
154 LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num
155 << '\n');
156 Num = findMax(Num, [=](const Candidate &C) {
157 auto SU = C.SU;
158 auto Res = getReadySuccessors(SU);
159 LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res
160 << " successors, metric = " << Res << '\n');
161 return Res;
162 });
163 if (Num == 1) break;
164
165 Num = Num ? Num : RQ.size();
167 dbgs()
168 << "\nCan't find best candidate, selecting in program order among "
169 << Num << '\n');
170 Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
171 assert(Num == 1);
172 } while (false);
173
174 return &RQ.front();
175}
176
177void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
179 for (const auto &S : SchedSU->Succs) {
180 if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
181 S.getKind() != SDep::Data)
182 continue;
183 for (const auto &P : S.getSUnit()->Preds) {
184 auto PSU = P.getSUnit();
185 assert(!PSU->isBoundaryNode());
186 if (PSU != SchedSU && !isScheduled(PSU)) {
187 Set.insert(PSU);
188 }
189 }
190 }
191 SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
192 while (!Worklist.empty()) {
193 auto SU = Worklist.pop_back_val();
194 assert(!SU->isBoundaryNode());
195 for (const auto &P : SU->Preds) {
196 if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
197 Set.insert(P.getSUnit()).second)
198 Worklist.push_back(P.getSUnit());
199 }
200 }
201 LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
202 << ")'s non-ready successors of " << Priority
203 << " priority in ready queue: ");
204 for (auto &C : RQ) {
205 if (Set.count(C.SU)) {
206 C.Priority = Priority;
207 LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
208 }
209 }
210 LLVM_DEBUG(dbgs() << '\n');
211}
212
213void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
214 for (const auto &S : SU->Succs) {
215 auto SuccSU = S.getSUnit();
216 if (S.isWeak())
217 continue;
218 assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
219 if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
220 RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
221 }
222}
223
224std::vector<const SUnit*>
225GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
226 const ScheduleDAG &DAG) {
227 const auto &SUnits = DAG.SUnits;
228 std::vector<const SUnit*> Schedule;
229 Schedule.reserve(SUnits.size());
230
231 initNumPreds(SUnits);
232
233 int StepNo = 0;
234
235 for (const auto *SU : TopRoots) {
236 RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
237 }
238 releaseSuccessors(&DAG.EntrySU, StepNo);
239
240 while (!RQ.empty()) {
241 LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo
242 << "\n"
243 "Ready queue:";
244 for (auto &C
245 : RQ) dbgs()
246 << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
247 dbgs() << '\n';);
248
249 auto C = pickCandidate();
250 assert(C);
251 RQ.remove(*C);
252 auto SU = C->SU;
253 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
254
255 releaseSuccessors(SU, StepNo);
256 Schedule.push_back(SU);
257 setIsScheduled(SU);
258
259 if (getReadySuccessors(SU) == 0)
260 bumpPredsPriority(SU, StepNo);
261
262 ++StepNo;
263 }
264 assert(SUnits.size() == Schedule.size());
265
266 return Schedule;
267}
268
269namespace llvm {
270
271std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
272 const ScheduleDAG &DAG) {
273 GCNMinRegScheduler S;
274 return S.schedule(TopRoots, DAG);
275}
276
277} // end namespace llvm
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
Definition: Allocator.h:148
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562
virtual void dumpNode(const SUnit &SU) const =0
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
iterator end() const
Definition: SmallPtrSet.h:385
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:382
A simple intrusive list implementation.
Definition: simple_ilist.h:81
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
std::vector< const SUnit * > makeMinRegSchedule(ArrayRef< const SUnit * > TopRoots, const ScheduleDAG &DAG)