24#include "llvm/Config/llvm-config.h"
38#define DEBUG_TYPE "pre-RA-sched"
40STATISTIC(NumNewPredsAdded,
"Number of times a single predecessor was added");
42 "Number of times the topological order has been recomputed");
47 cl::desc(
"Stress test instruction scheduling"));
50void SchedulingPriorityQueue::anchor() {}
53 :
TM(mf.getTarget()),
TII(mf.getSubtarget().getInstrInfo()),
54 TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
55 MRI(mf.getRegInfo()) {
70 if (!Node || !Node->isMachineOpcode())
return nullptr;
71 return &
TII->
get(Node->getMachineOpcode());
94 switch(Contents.OrdKind) {
111 if (!Required && PredDep.getSUnit() ==
D.getSUnit())
113 if (PredDep.overlaps(
D)) {
116 if (PredDep.getLatency() <
D.getLatency()) {
117 SUnit *PredSU = PredDep.getSUnit();
119 SDep ForwardD = PredDep;
122 if (SuccDep == ForwardD) {
127 PredDep.setLatency(
D.getLatency());
139 "NumPreds will overflow!");
140 assert(
N->NumSuccs < std::numeric_limits<unsigned>::max() &&
141 "NumSuccs will overflow!");
145 if (!
N->isScheduled) {
151 "NumPredsLeft will overflow!");
160 assert(
N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
161 "NumSuccsLeft will overflow!");
166 N->Succs.push_back(
P);
167 if (
P.getLatency() != 0) {
184 assert(Succ !=
N->Succs.end() &&
"Mismatching preds / succs lists!");
188 assert(
N->NumSuccs > 0 &&
"NumSuccs will underflow!");
192 if (!
N->isScheduled) {
203 assert(
N->WeakSuccsLeft > 0 &&
"WeakSuccsLeft will underflow!");
206 assert(
N->NumSuccsLeft > 0 &&
"NumSuccsLeft will underflow!");
210 N->Succs.erase(Succ);
212 if (
P.getLatency() != 0) {
219 if (!isDepthCurrent)
return;
224 SU->isDepthCurrent =
false;
227 if (SuccSU->isDepthCurrent)
230 }
while (!WorkList.
empty());
234 if (!isHeightCurrent)
return;
239 SU->isHeightCurrent =
false;
242 if (PredSU->isHeightCurrent)
245 }
while (!WorkList.
empty());
253 isDepthCurrent =
true;
261 isHeightCurrent =
true;
265void SUnit::ComputeDepth() {
272 unsigned MaxPredDepth = 0;
275 if (PredSU->isDepthCurrent)
276 MaxPredDepth = std::max(MaxPredDepth,
286 if (MaxPredDepth != Cur->Depth) {
288 Cur->Depth = MaxPredDepth;
290 Cur->isDepthCurrent =
true;
292 }
while (!WorkList.
empty());
296void SUnit::ComputeHeight() {
303 unsigned MaxSuccHeight = 0;
306 if (SuccSU->isHeightCurrent)
307 MaxSuccHeight = std::max(MaxSuccHeight,
317 if (MaxSuccHeight != Cur->Height) {
319 Cur->Height = MaxSuccHeight;
321 Cur->isHeightCurrent =
true;
323 }
while (!WorkList.
empty());
331 unsigned MaxDepth = BestI->getSUnit()->getDepth();
337 if (BestI !=
Preds.begin())
341#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
367 if (SU.
Preds.size() > 0) {
368 dbgs() <<
" Predecessors:\n";
377 if (SU.
Succs.size() > 0) {
378 dbgs() <<
" Successors:\n";
392 bool AnyNotSched =
false;
393 unsigned DeadNodes = 0;
401 dbgs() <<
"*** Scheduling failed! ***\n";
403 dbgs() <<
"has not been scheduled!\n";
408 unsigned(std::numeric_limits<int>::max())) {
410 dbgs() <<
"*** Scheduling failed! ***\n";
412 dbgs() <<
"has an unexpected "
413 << (isBottomUp ?
"Height" :
"Depth") <<
" value!\n";
419 dbgs() <<
"*** Scheduling failed! ***\n";
421 dbgs() <<
"has successors left!\n";
427 dbgs() <<
"*** Scheduling failed! ***\n";
429 dbgs() <<
"has predecessors left!\n";
435 return SUnits.size() - DeadNodes;
471 unsigned DAGSize = SUnits.size();
472 std::vector<SUnit*> WorkList;
475 Index2Node.resize(DAGSize);
476 Node2Index.resize(DAGSize);
480 WorkList.push_back(ExitSU);
481 for (
SUnit &SU : SUnits) {
482 int NodeNum = SU.NodeNum;
483 unsigned Degree = SU.Succs.size();
485 Node2Index[NodeNum] = Degree;
489 assert(SU.Succs.empty() &&
"SUnit should have no successors");
491 WorkList.push_back(&SU);
496 while (!WorkList.empty()) {
497 SUnit *SU = WorkList.back();
506 WorkList.push_back(SU);
515 for (
SUnit &SU : SUnits) {
516 for (
const SDep &PD : SU.Preds) {
517 assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
518 "Wrong topological sorting");
524void ScheduleDAGTopologicalSort::FixOrder() {
532 for (
auto &U : Updates)
541 Dirty = Dirty || Updates.size() > 10;
546 Updates.emplace_back(
Y,
X);
550 int UpperBound, LowerBound;
551 LowerBound = Node2Index[
Y->NodeNum];
552 UpperBound = Node2Index[
X->NodeNum];
553 bool HasLoop =
false;
555 if (LowerBound < UpperBound) {
558 DFS(
Y, UpperBound, HasLoop);
559 assert(!HasLoop &&
"Inserted edge creates a loop!");
561 Shift(Visited, LowerBound, UpperBound);
571void ScheduleDAGTopologicalSort::DFS(
const SUnit *SU,
int UpperBound,
573 std::vector<const SUnit*> WorkList;
574 WorkList.
reserve(SUnits.size());
576 WorkList.push_back(SU);
578 SU = WorkList.back();
584 if (s >= Node2Index.size())
586 if (Node2Index[s] == UpperBound) {
591 if (!Visited.
test(s) && Node2Index[s] < UpperBound) {
592 WorkList.push_back(SuccDep.
getSUnit());
595 }
while (!WorkList.empty());
599 const SUnit &TargetSU,
601 std::vector<const SUnit*> WorkList;
602 int LowerBound = Node2Index[StartSU.
NodeNum];
603 int UpperBound = Node2Index[TargetSU.
NodeNum];
606 std::vector<int> Nodes;
608 if (LowerBound > UpperBound) {
613 WorkList.reserve(SUnits.size());
620 const SUnit *SU = WorkList.back();
623 const SUnit *Succ = SD.getSUnit();
628 if (Node2Index[s] == UpperBound) {
633 if (!Visited.
test(s) && Node2Index[s] < UpperBound) {
638 }
while (!WorkList.empty());
646 VisitedBack.
resize(SUnits.size());
652 WorkList.push_back(&TargetSU);
654 const SUnit *SU = WorkList.back();
657 const SUnit *Pred = SD.getSUnit();
662 if (Node2Index[s] == LowerBound) {
666 if (!VisitedBack.
test(s) && Visited.
test(s)) {
672 }
while (!WorkList.empty());
674 assert(Found &&
"Error in SUnit Graph!");
679void ScheduleDAGTopologicalSort::Shift(
BitVector& Visited,
int LowerBound,
685 for (i = LowerBound; i <= UpperBound; ++i) {
687 int w = Index2Node[i];
688 if (Visited.
test(w)) {
694 Allocate(w, i - shift);
698 for (
unsigned LI : L) {
699 Allocate(LI, i - shift);
709 for (
const SDep &PredDep : TargetSU->
Preds)
717 assert(SU->
NodeNum == Index2Node.size() &&
"Node cannot be added at the end");
718 assert(SU->
NumPreds == 0 &&
"Can only add SU's with no predecessors");
719 Node2Index.push_back(Index2Node.size());
720 Index2Node.push_back(SU->
NodeNum);
721 Visited.
resize(Node2Index.size());
725 const SUnit *TargetSU) {
726 assert(TargetSU !=
nullptr &&
"Invalid target SUnit");
727 assert(SU !=
nullptr &&
"Invalid SUnit");
731 int UpperBound, LowerBound;
732 LowerBound = Node2Index[TargetSU->
NodeNum];
733 UpperBound = Node2Index[SU->
NodeNum];
734 bool HasLoop =
false;
736 if (LowerBound < UpperBound) {
739 DFS(TargetSU, UpperBound, HasLoop);
744void ScheduleDAGTopologicalSort::Allocate(
int n,
int index) {
745 Node2Index[n] = index;
746 Index2Node[index] = n;
751 : SUnits(sunits), ExitSU(exitsu) {}
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static const unsigned MaxDepth
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< bool > StressSchedOpt("stress-sched", cl::Hidden, cl::init(false), cl::desc("Stress test instruction scheduling"))
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Represents one node in the SelectionDAG.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
@ Weak
Arbitrary weak DAG edge.
@ MustAliasMem
Nonvolatile load/Store instructions that must alias.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
unsigned getReg() const
Returns the register associated with this edge.
void dump(const TargetRegisterInfo *TRI=nullptr) const
Scheduling unit. This is a node in the scheduling DAG.
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NodeNum
Entry # of node in the node vector.
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
SmallVectorImpl< SDep >::iterator pred_iterator
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
unsigned short NumRegDefsLeft
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
void dumpAttributes() const
SmallVector< SDep, 4 > Succs
All sunit successors.
void setDepthDirty()
Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
void setDepthToAtLeast(unsigned NewDepth)
If NewDepth is greater than this node's depth value, sets it to be the new depth value.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
ScheduleDAGTopologicalSort(std::vector< SUnit > &SUnits, SUnit *ExitSU)
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
void clearDAG()
Clears the DAG state (between regions).
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
ScheduleDAG(const ScheduleDAG &)=delete
void dumpNodeAll(const SUnit &SU) const
unsigned VerifyScheduledDAG(bool isBottomUp)
Verifies that all SUnits were scheduled and that their state is consistent.
virtual void dumpNode(const SUnit &SU) const =0
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
virtual ~ScheduleHazardRecognizer()
void reserve(size_type N)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.