24#include "llvm/Config/llvm-config.h"
38#define DEBUG_TYPE "pre-RA-sched"
40STATISTIC(NumNewPredsAdded,
"Number of times a single predecessor was added");
42 "Number of times the topological order has been recomputed");
47 cl::desc(
"Stress test instruction scheduling"));
50void SchedulingPriorityQueue::anchor() {}
53 : TM(mf.getTarget()),
TII(mf.getSubtarget().getInstrInfo()),
54 TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
55 MRI(mf.getRegInfo()) {
70 if (!Node || !Node->isMachineOpcode())
return nullptr;
71 return &
TII->
get(Node->getMachineOpcode());
94 switch(Contents.OrdKind) {
111 if (!Required && PredDep.getSUnit() ==
D.getSUnit())
113 if (PredDep.overlaps(
D)) {
116 if (PredDep.getLatency() <
D.getLatency()) {
117 SUnit *PredSU = PredDep.getSUnit();
119 SDep ForwardD = PredDep;
122 if (SuccDep == ForwardD) {
127 PredDep.setLatency(
D.getLatency());
142 "NumPreds will overflow!");
143 assert(
N->NumSuccs < std::numeric_limits<unsigned>::max() &&
144 "NumSuccs will overflow!");
148 if (!
N->isScheduled) {
154 "NumPredsLeft will overflow!");
163 assert(
N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
164 "NumSuccsLeft will overflow!");
169 N->Succs.push_back(
P);
185 assert(Succ !=
N->Succs.end() &&
"Mismatching preds / succs lists!");
189 assert(
N->NumSuccs > 0 &&
"NumSuccs will underflow!");
193 if (!
N->isScheduled) {
204 assert(
N->WeakSuccsLeft > 0 &&
"WeakSuccsLeft will underflow!");
207 assert(
N->NumSuccsLeft > 0 &&
"NumSuccsLeft will underflow!");
211 N->Succs.erase(Succ);
218 if (!isDepthCurrent)
return;
223 SU->isDepthCurrent =
false;
226 if (SuccSU->isDepthCurrent)
229 }
while (!WorkList.
empty());
233 if (!isHeightCurrent)
return;
238 SU->isHeightCurrent =
false;
241 if (PredSU->isHeightCurrent)
244 }
while (!WorkList.
empty());
252 isDepthCurrent =
true;
260 isHeightCurrent =
true;
264void SUnit::ComputeDepth() {
271 unsigned MaxPredDepth = 0;
274 if (PredSU->isDepthCurrent)
275 MaxPredDepth = std::max(MaxPredDepth,
285 if (MaxPredDepth != Cur->Depth) {
287 Cur->Depth = MaxPredDepth;
289 Cur->isDepthCurrent =
true;
291 }
while (!WorkList.
empty());
295void SUnit::ComputeHeight() {
302 unsigned MaxSuccHeight = 0;
305 if (SuccSU->isHeightCurrent)
306 MaxSuccHeight = std::max(MaxSuccHeight,
316 if (MaxSuccHeight != Cur->Height) {
318 Cur->Height = MaxSuccHeight;
320 Cur->isHeightCurrent =
true;
322 }
while (!WorkList.
empty());
330 unsigned MaxDepth = BestI->getSUnit()->getDepth();
338 if (BestI !=
Preds.begin())
342#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
368 if (SU.
Preds.size() > 0) {
369 dbgs() <<
" Predecessors:\n";
378 if (SU.
Succs.size() > 0) {
379 dbgs() <<
" Successors:\n";
393 bool AnyNotSched =
false;
394 unsigned DeadNodes = 0;
402 dbgs() <<
"*** Scheduling failed! ***\n";
404 dbgs() <<
"has not been scheduled!\n";
409 unsigned(std::numeric_limits<int>::max())) {
411 dbgs() <<
"*** Scheduling failed! ***\n";
413 dbgs() <<
"has an unexpected "
414 << (isBottomUp ?
"Height" :
"Depth") <<
" value!\n";
420 dbgs() <<
"*** Scheduling failed! ***\n";
422 dbgs() <<
"has successors left!\n";
428 dbgs() <<
"*** Scheduling failed! ***\n";
430 dbgs() <<
"has predecessors left!\n";
436 return SUnits.size() - DeadNodes;
472 unsigned DAGSize = SUnits.size();
473 std::vector<SUnit*> WorkList;
476 Index2Node.resize(DAGSize);
477 Node2Index.resize(DAGSize);
481 WorkList.push_back(ExitSU);
482 for (
SUnit &SU : SUnits) {
483 int NodeNum = SU.NodeNum;
484 unsigned Degree = SU.Succs.size();
486 Node2Index[NodeNum] = Degree;
490 assert(SU.Succs.empty() &&
"SUnit should have no successors");
492 WorkList.push_back(&SU);
497 while (!WorkList.empty()) {
498 SUnit *SU = WorkList.back();
507 WorkList.push_back(SU);
516 for (
SUnit &SU : SUnits) {
517 for (
const SDep &PD : SU.Preds) {
518 assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
519 "Wrong topological sorting");
525void ScheduleDAGTopologicalSort::FixOrder() {
533 for (
auto &U : Updates)
542 Dirty = Dirty || Updates.size() > 10;
547 Updates.emplace_back(
Y,
X);
551 int UpperBound, LowerBound;
552 LowerBound = Node2Index[
Y->NodeNum];
553 UpperBound = Node2Index[
X->NodeNum];
554 bool HasLoop =
false;
556 if (LowerBound < UpperBound) {
559 DFS(
Y, UpperBound, HasLoop);
560 assert(!HasLoop &&
"Inserted edge creates a loop!");
562 Shift(Visited, LowerBound, UpperBound);
572void ScheduleDAGTopologicalSort::DFS(
const SUnit *SU,
int UpperBound,
574 std::vector<const SUnit*> WorkList;
575 WorkList.
reserve(SUnits.size());
577 WorkList.push_back(SU);
579 SU = WorkList.back();
585 if (s >= Node2Index.size())
587 if (Node2Index[s] == UpperBound) {
592 if (!Visited.
test(s) && Node2Index[s] < UpperBound) {
593 WorkList.push_back(SuccDep.
getSUnit());
596 }
while (!WorkList.empty());
600 const SUnit &TargetSU,
602 std::vector<const SUnit*> WorkList;
603 int LowerBound = Node2Index[StartSU.
NodeNum];
604 int UpperBound = Node2Index[TargetSU.
NodeNum];
607 std::vector<int> Nodes;
609 if (LowerBound > UpperBound) {
614 WorkList.reserve(SUnits.size());
621 const SUnit *SU = WorkList.back();
624 const SUnit *Succ = SD.getSUnit();
629 if (Node2Index[s] == UpperBound) {
634 if (!Visited.
test(s) && Node2Index[s] < UpperBound) {
639 }
while (!WorkList.empty());
647 VisitedBack.
resize(SUnits.size());
653 WorkList.push_back(&TargetSU);
655 const SUnit *SU = WorkList.back();
658 const SUnit *Pred = SD.getSUnit();
663 if (Node2Index[s] == LowerBound) {
667 if (!VisitedBack.
test(s) && Visited.
test(s)) {
673 }
while (!WorkList.empty());
675 assert(Found &&
"Error in SUnit Graph!");
680void ScheduleDAGTopologicalSort::Shift(
BitVector& Visited,
int LowerBound,
686 for (i = LowerBound; i <= UpperBound; ++i) {
688 int w = Index2Node[i];
689 if (Visited.
test(w)) {
695 Allocate(w, i - shift);
699 for (
unsigned LI : L) {
700 Allocate(LI, i - shift);
710 for (
const SDep &PredDep : TargetSU->
Preds)
718 assert(SU->
NodeNum == Index2Node.size() &&
"Node cannot be added at the end");
719 assert(SU->
NumPreds == 0 &&
"Can only add SU's with no predecessors");
720 Node2Index.push_back(Index2Node.size());
721 Index2Node.push_back(SU->
NodeNum);
722 Visited.
resize(Node2Index.size());
726 const SUnit *TargetSU) {
727 assert(TargetSU !=
nullptr &&
"Invalid target SUnit");
728 assert(SU !=
nullptr &&
"Invalid SUnit");
732 int UpperBound, LowerBound;
733 LowerBound = Node2Index[TargetSU->
NodeNum];
734 UpperBound = Node2Index[SU->
NodeNum];
735 bool HasLoop =
false;
737 if (LowerBound < UpperBound) {
740 DFS(TargetSU, UpperBound, HasLoop);
745void ScheduleDAGTopologicalSort::Allocate(
int n,
int index) {
746 Node2Index[n] = index;
747 Index2Node[index] = n;
752 : SUnits(sunits), ExitSU(exitsu) {}
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static const unsigned MaxDepth
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > StressSchedOpt("stress-sched", cl::Hidden, cl::init(false), cl::desc("Stress test instruction scheduling"))
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Represents one node in the SelectionDAG.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
@ Weak
Arbitrary weak DAG edge.
@ MustAliasMem
Nonvolatile load/Store instructions that must alias.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
unsigned getReg() const
Returns the register associated with this edge.
void dump(const TargetRegisterInfo *TRI=nullptr) const
Scheduling unit. This is a node in the scheduling DAG.
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NodeNum
Entry # of node in the node vector.
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
SmallVectorImpl< SDep >::iterator pred_iterator
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
unsigned short NumRegDefsLeft
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
void dumpAttributes() const
SmallVector< SDep, 4 > Succs
All sunit successors.
void setDepthDirty()
Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
void setDepthToAtLeast(unsigned NewDepth)
If NewDepth is greater than this node's depth value, sets it to be the new depth value.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
ScheduleDAGTopologicalSort(std::vector< SUnit > &SUnits, SUnit *ExitSU)
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
void clearDAG()
Clears the DAG state (between regions).
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
ScheduleDAG(const ScheduleDAG &)=delete
void dumpNodeAll(const SUnit &SU) const
unsigned VerifyScheduledDAG(bool isBottomUp)
Verifies that all SUnits were scheduled and that their state is consistent.
virtual void dumpNode(const SUnit &SU) const =0
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
virtual ~ScheduleHazardRecognizer()
void reserve(size_type N)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.