LLVM  14.0.0git
GCNMinRegStrategy.cpp
Go to the documentation of this file.
1 //===- GCNMinRegStrategy.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines and imlements the class GCNMinRegScheduler, which
11 /// implements an experimental, simple scheduler whose main goal is to learn
12 /// ways about consuming less possible registers for a region.
13 ///
14 //===----------------------------------------------------------------------===//
15 
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "machine-scheduler"
20 
21 namespace {
22 
23 class GCNMinRegScheduler {
24  struct Candidate : ilist_node<Candidate> {
25  const SUnit *SU;
26  int Priority;
27 
28  Candidate(const SUnit *SU_, int Priority_ = 0)
29  : SU(SU_), Priority(Priority_) {}
30  };
31 
33  using Queue = simple_ilist<Candidate>;
34  Queue RQ; // Ready queue
35 
36  std::vector<unsigned> NumPreds;
37 
38  bool isScheduled(const SUnit *SU) const {
39  assert(!SU->isBoundaryNode());
40  return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
41  }
42 
43  void setIsScheduled(const SUnit *SU) {
44  assert(!SU->isBoundaryNode());
46  }
47 
48  unsigned getNumPreds(const SUnit *SU) const {
49  assert(!SU->isBoundaryNode());
51  return NumPreds[SU->NodeNum];
52  }
53 
54  unsigned decNumPreds(const SUnit *SU) {
55  assert(!SU->isBoundaryNode());
57  return --NumPreds[SU->NodeNum];
58  }
59 
60  void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
61 
62  int getReadySuccessors(const SUnit *SU) const;
63  int getNotReadySuccessors(const SUnit *SU) const;
64 
65  template <typename Calc>
66  unsigned findMax(unsigned Num, Calc C);
67 
68  Candidate* pickCandidate();
69 
70  void bumpPredsPriority(const SUnit *SchedSU, int Priority);
71  void releaseSuccessors(const SUnit* SU, int Priority);
72 
73 public:
74  std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
75  const ScheduleDAG &DAG);
76 };
77 
78 } // end anonymous namespace
79 
80 void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
81  NumPreds.resize(SUnits.size());
82  for (unsigned I = 0; I < SUnits.size(); ++I)
83  NumPreds[I] = SUnits[I].NumPredsLeft;
84 }
85 
86 int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
87  unsigned NumSchedSuccs = 0;
88  for (auto SDep : SU->Succs) {
89  bool wouldBeScheduled = true;
90  for (auto PDep : SDep.getSUnit()->Preds) {
91  auto PSU = PDep.getSUnit();
92  assert(!PSU->isBoundaryNode());
93  if (PSU != SU && !isScheduled(PSU)) {
94  wouldBeScheduled = false;
95  break;
96  }
97  }
98  NumSchedSuccs += wouldBeScheduled ? 1 : 0;
99  }
100  return NumSchedSuccs;
101 }
102 
103 int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
104  return SU->Succs.size() - getReadySuccessors(SU);
105 }
106 
107 template <typename Calc>
108 unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
109  assert(!RQ.empty() && Num <= RQ.size());
110 
111  using T = decltype(C(*RQ.begin())) ;
112 
114  unsigned NumMax = 0;
115  for (auto I = RQ.begin(); Num; --Num) {
116  T Cur = C(*I);
117  if (Cur >= Max) {
118  if (Cur > Max) {
119  Max = Cur;
120  NumMax = 1;
121  } else
122  ++NumMax;
123  auto &Cand = *I++;
124  RQ.remove(Cand);
125  RQ.push_front(Cand);
126  continue;
127  }
128  ++I;
129  }
130  return NumMax;
131 }
132 
133 GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
134  do {
135  unsigned Num = RQ.size();
136  if (Num == 1) break;
137 
138  LLVM_DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num
139  << '\n');
140  Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
141  if (Num == 1) break;
142 
143  LLVM_DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
144  << Num << '\n');
145  Num = findMax(Num, [=](const Candidate &C) {
146  auto SU = C.SU;
147  int Res = getNotReadySuccessors(SU);
148  LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
149  << Res << " successors, metric = " << -Res << '\n');
150  return -Res;
151  });
152  if (Num == 1) break;
153 
154  LLVM_DEBUG(dbgs() << "\nSelecting most producing candidate among " << Num
155  << '\n');
156  Num = findMax(Num, [=](const Candidate &C) {
157  auto SU = C.SU;
158  auto Res = getReadySuccessors(SU);
159  LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready " << Res
160  << " successors, metric = " << Res << '\n');
161  return Res;
162  });
163  if (Num == 1) break;
164 
165  Num = Num ? Num : RQ.size();
166  LLVM_DEBUG(
167  dbgs()
168  << "\nCan't find best candidate, selecting in program order among "
169  << Num << '\n');
170  Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
171  assert(Num == 1);
172  } while (false);
173 
174  return &RQ.front();
175 }
176 
177 void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
179  for (const auto &S : SchedSU->Succs) {
180  if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
181  S.getKind() != SDep::Data)
182  continue;
183  for (const auto &P : S.getSUnit()->Preds) {
184  auto PSU = P.getSUnit();
185  assert(!PSU->isBoundaryNode());
186  if (PSU != SchedSU && !isScheduled(PSU)) {
187  Set.insert(PSU);
188  }
189  }
190  }
191  SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
192  while (!Worklist.empty()) {
193  auto SU = Worklist.pop_back_val();
194  assert(!SU->isBoundaryNode());
195  for (const auto &P : SU->Preds) {
196  if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
197  Set.insert(P.getSUnit()).second)
198  Worklist.push_back(P.getSUnit());
199  }
200  }
201  LLVM_DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
202  << ")'s non-ready successors of " << Priority
203  << " priority in ready queue: ");
204  for (auto &C : RQ) {
205  if (Set.count(C.SU)) {
206  C.Priority = Priority;
207  LLVM_DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
208  }
209  }
210  LLVM_DEBUG(dbgs() << '\n');
211 }
212 
213 void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
214  for (const auto &S : SU->Succs) {
215  auto SuccSU = S.getSUnit();
216  if (S.isWeak())
217  continue;
218  assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
219  if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
220  RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
221  }
222 }
223 
224 std::vector<const SUnit*>
225 GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
226  const ScheduleDAG &DAG) {
227  const auto &SUnits = DAG.SUnits;
228  std::vector<const SUnit*> Schedule;
229  Schedule.reserve(SUnits.size());
230 
231  initNumPreds(SUnits);
232 
233  int StepNo = 0;
234 
235  for (auto SU : TopRoots) {
236  RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
237  }
238  releaseSuccessors(&DAG.EntrySU, StepNo);
239 
240  while (!RQ.empty()) {
241  LLVM_DEBUG(dbgs() << "\n=== Picking candidate, Step = " << StepNo
242  << "\n"
243  "Ready queue:";
244  for (auto &C
245  : RQ) dbgs()
246  << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
247  dbgs() << '\n';);
248 
249  auto C = pickCandidate();
250  assert(C);
251  RQ.remove(*C);
252  auto SU = C->SU;
253  LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
254 
255  releaseSuccessors(SU, StepNo);
256  Schedule.push_back(SU);
257  setIsScheduled(SU);
258 
259  if (getReadySuccessors(SU) == 0)
260  bumpPredsPriority(SU, StepNo);
261 
262  ++StepNo;
263  }
264  assert(SUnits.size() == Schedule.size());
265 
266  return Schedule;
267 }
268 
269 namespace llvm {
270 
271 std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
272  const ScheduleDAG &DAG) {
273  GCNMinRegScheduler S;
274  return S.schedule(TopRoots, DAG);
275 }
276 
277 } // end namespace llvm
ScheduleDAG.h
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
T
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::SpecificBumpPtrAllocator
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:376
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::BumpPtrAllocatorImpl::Allocate
LLVM_ATTRIBUTE_RETURNS_NONNULL LLVM_ATTRIBUTE_RETURNS_NOALIAS void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
Definition: Allocator.h:145
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::makeMinRegSchedule
std::vector< const SUnit * > makeMinRegSchedule(ArrayRef< const SUnit * > TopRoots, const ScheduleDAG &DAG)
Definition: GCNMinRegStrategy.cpp:271
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::ScheduleDAG
Definition: ScheduleDAG.h:555
llvm::ScheduleDAG::dumpNode
virtual void dumpNode(const SUnit &SU) const =0
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ScheduleDAG::EntrySU
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:563
llvm::ilist_node
Definition: ilist_node.h:148
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::simple_ilist
A simple intrusive list implementation.
Definition: simple_ilist.h:78
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364