19#define DEBUG_TYPE "machine-scheduler"
23class GCNMinRegScheduler {
28 Candidate(
const SUnit *SU_,
int Priority_ = 0)
29 : SU(SU_), Priority(Priority_) {}
36 std::vector<unsigned> NumPreds;
38 bool isScheduled(
const SUnit *SU)
const {
40 return NumPreds[SU->
NodeNum] == std::numeric_limits<unsigned>::max();
43 void setIsScheduled(
const SUnit *SU) {
45 NumPreds[SU->
NodeNum] = std::numeric_limits<unsigned>::max();
48 unsigned getNumPreds(
const SUnit *SU)
const {
50 assert(NumPreds[SU->
NodeNum] != std::numeric_limits<unsigned>::max());
54 unsigned decNumPreds(
const SUnit *SU) {
56 assert(NumPreds[SU->
NodeNum] != std::numeric_limits<unsigned>::max());
62 int getReadySuccessors(
const SUnit *SU)
const;
63 int getNotReadySuccessors(
const SUnit *SU)
const;
65 template <
typename Calc>
66 unsigned findMax(
unsigned Num, Calc
C);
68 Candidate* pickCandidate();
70 void bumpPredsPriority(
const SUnit *SchedSU,
int Priority);
71 void releaseSuccessors(
const SUnit* SU,
int Priority);
81 NumPreds.resize(SUnits.size());
82 for (
unsigned I = 0;
I < SUnits.size(); ++
I)
83 NumPreds[
I] = SUnits[
I].NumPredsLeft;
86int GCNMinRegScheduler::getReadySuccessors(
const SUnit *SU)
const {
87 unsigned NumSchedSuccs = 0;
89 bool wouldBeScheduled =
true;
91 auto PSU = PDep.getSUnit();
92 assert(!PSU->isBoundaryNode());
93 if (PSU != SU && !isScheduled(PSU)) {
94 wouldBeScheduled =
false;
98 NumSchedSuccs += wouldBeScheduled ? 1 : 0;
100 return NumSchedSuccs;
103int GCNMinRegScheduler::getNotReadySuccessors(
const SUnit *SU)
const {
104 return SU->
Succs.size() - getReadySuccessors(SU);
107template <
typename Calc>
108unsigned GCNMinRegScheduler::findMax(
unsigned Num, Calc
C) {
109 assert(!RQ.empty() && Num <= RQ.size());
111 using T =
decltype(
C(*RQ.begin())) ;
113 T Max = std::numeric_limits<T>::min();
115 for (
auto I = RQ.begin(); Num; --Num) {
133GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
135 unsigned Num = RQ.size();
138 LLVM_DEBUG(
dbgs() <<
"\nSelecting max priority candidates among " << Num
140 Num = findMax(Num, [=](
const Candidate &
C) {
return C.Priority; });
143 LLVM_DEBUG(
dbgs() <<
"\nSelecting min non-ready producing candidate among "
145 Num = findMax(Num, [=](
const Candidate &
C) {
147 int Res = getNotReadySuccessors(SU);
149 << Res <<
" successors, metric = " << -Res <<
'\n');
154 LLVM_DEBUG(
dbgs() <<
"\nSelecting most producing candidate among " << Num
156 Num = findMax(Num, [=](
const Candidate &
C) {
158 auto Res = getReadySuccessors(SU);
160 <<
" successors, metric = " << Res <<
'\n');
165 Num = Num ? Num : RQ.size();
168 <<
"\nCan't find best candidate, selecting in program order among "
170 Num = findMax(Num, [=](
const Candidate &
C) {
return -(int64_t)
C.SU->NodeNum; });
177void GCNMinRegScheduler::bumpPredsPriority(
const SUnit *SchedSU,
int Priority) {
179 for (
const auto &S : SchedSU->
Succs) {
180 if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
183 for (
const auto &
P : S.getSUnit()->Preds) {
184 auto PSU =
P.getSUnit();
185 assert(!PSU->isBoundaryNode());
186 if (PSU != SchedSU && !isScheduled(PSU)) {
192 while (!Worklist.empty()) {
193 auto SU = Worklist.pop_back_val();
195 for (
const auto &
P : SU->
Preds) {
196 if (!
P.getSUnit()->isBoundaryNode() && !isScheduled(
P.getSUnit()) &&
197 Set.insert(
P.getSUnit()).second)
198 Worklist.push_back(
P.getSUnit());
202 <<
")'s non-ready successors of " << Priority
203 <<
" priority in ready queue: ");
205 if (
Set.count(
C.SU)) {
206 C.Priority = Priority;
213void GCNMinRegScheduler::releaseSuccessors(
const SUnit* SU,
int Priority) {
214 for (
const auto &S : SU->
Succs) {
215 auto SuccSU = S.getSUnit();
218 assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
219 if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
220 RQ.push_front(*
new (Alloc.
Allocate()) Candidate(SuccSU, Priority));
224std::vector<const SUnit*>
227 const auto &SUnits = DAG.
SUnits;
228 std::vector<const SUnit*> Schedule;
229 Schedule.reserve(SUnits.size());
231 initNumPreds(SUnits);
235 for (
const auto *SU : TopRoots) {
236 RQ.push_back(*
new (Alloc.
Allocate()) Candidate(SU, StepNo));
238 releaseSuccessors(&DAG.
EntrySU, StepNo);
240 while (!RQ.empty()) {
246 <<
' ' <<
C.SU->NodeNum <<
"(P" <<
C.Priority <<
')';
249 auto C = pickCandidate();
255 releaseSuccessors(SU, StepNo);
256 Schedule.push_back(SU);
259 if (getReadySuccessors(SU) == 0)
260 bumpPredsPriority(SU, StepNo);
264 assert(SUnits.size() == Schedule.size());
273 GCNMinRegScheduler S;
274 return S.schedule(TopRoots, DAG);
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
@ Data
Regular data dependence (aka true-dependence).
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
std::vector< SUnit > SUnits
The scheduling units.
SUnit EntrySU
Special node for the region entry.
virtual void dumpNode(const SUnit &SU) const =0
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
A simple intrusive list implementation.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::vector< const SUnit * > makeMinRegSchedule(ArrayRef< const SUnit * > TopRoots, const ScheduleDAG &DAG)