73#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "pipeliner"
103STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
104STATISTIC(NumPipelined,
"Number of loops software pipelined");
105STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
106STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
107STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
108STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
109STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
110STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
111STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
112STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
113STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
202 cl::desc(
"Enable CopyToPhi DAG Mutation"));
207 "pipeliner-force-issue-width",
214 cl::desc(
"Set how to use window scheduling algorithm."),
216 "Turn off window algorithm."),
218 "Use window algorithm after SMS algorithm fails."),
220 "Use window algorithm instead of SMS algorithm.")));
224unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
232 "Modulo Software Pipelining",
false,
false)
242 if (skipFunction(mf.getFunction()))
248 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
252 if (!mf.getSubtarget().enableMachinePipeliner())
257 if (mf.getSubtarget().useDFAforSMS() &&
258 (!mf.getSubtarget().getInstrItineraryData() ||
259 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
263 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
264 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
265 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
266 TII = MF->getSubtarget().getInstrInfo();
267 RegClassInfo.runOnMachineFunction(*MF);
269 for (
const auto &L : *MLI)
279bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
280 bool Changed =
false;
281 for (
const auto &InnerLoop : L)
282 Changed |= scheduleLoop(*InnerLoop);
294 setPragmaPipelineOptions(L);
295 if (!canPipelineLoop(L)) {
299 L.getStartLoc(), L.getHeader())
300 <<
"Failed to pipeline loop";
308 if (useSwingModuloScheduler())
309 Changed = swingModuloScheduler(L);
311 if (useWindowScheduler(Changed))
312 Changed = runWindowScheduler(L);
318void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
337 if (LoopID ==
nullptr)
344 MDNode *MD = dyn_cast<MDNode>(MDO);
354 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
356 "Pipeline initiation interval hint metadata should have two operands.");
358 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
360 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
369bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
370 if (
L.getNumBlocks() != 1) {
373 L.getStartLoc(),
L.getHeader())
374 <<
"Not a single basic block: "
375 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
383 L.getStartLoc(),
L.getHeader())
384 <<
"Disabled by Pragma.";
395 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
399 L.getStartLoc(),
L.getHeader())
400 <<
"The branch can't be understood";
409 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
413 L.getStartLoc(),
L.getHeader())
414 <<
"The loop structure is not supported";
419 if (!
L.getLoopPreheader()) {
420 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
424 L.getStartLoc(),
L.getHeader())
425 <<
"No loop preheader found";
431 preprocessPhiNodes(*
L.getHeader());
438 *getAnalysis<LiveIntervalsWrapperPass>().getLIS().getSlotIndexes();
443 auto *RC =
MRI.getRegClass(DefOp.
getReg());
445 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
470bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
471 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
474 *
this, L, getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
RegClassInfo,
495 return SMS.hasNewSchedule();
509bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
514 Context.
PassConfig = &getAnalysis<TargetPassConfig>();
515 Context.
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
516 Context.
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
522bool MachinePipeliner::useSwingModuloScheduler() {
527bool MachinePipeliner::useWindowScheduler(
bool Changed) {
534 "llvm.loop.pipeline.initiationinterval is set.\n");
542void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
545 else if (II_setByPragma > 0)
546 MII = II_setByPragma;
548 MII = std::max(ResMII, RecMII);
551void SwingSchedulerDAG::setMAX_II() {
554 else if (II_setByPragma > 0)
555 MAX_II = II_setByPragma;
565 addLoopCarriedDependences(AA);
566 updatePhiDependences();
574 findCircuits(NodeSets);
578 unsigned ResMII = calculateResMII();
579 unsigned RecMII = calculateRecMII(NodeSets);
587 setMII(ResMII, RecMII);
591 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
597 Pass.ORE->emit([&]() {
600 <<
"Invalid Minimal Initiation Interval: 0";
608 <<
", we don't pipeline large loops\n");
609 NumFailLargeMaxMII++;
610 Pass.ORE->emit([&]() {
613 <<
"Minimal Initiation Interval too large: "
614 <<
ore::NV(
"MII", (
int)MII) <<
" > "
616 <<
"Refer to -pipeliner-max-mii.";
621 computeNodeFunctions(NodeSets);
623 registerPressureFilter(NodeSets);
625 colocateNodeSets(NodeSets);
627 checkNodeSets(NodeSets);
630 for (
auto &
I : NodeSets) {
631 dbgs() <<
" Rec NodeSet ";
638 groupRemainingNodes(NodeSets);
640 removeDuplicateNodes(NodeSets);
643 for (
auto &
I : NodeSets) {
644 dbgs() <<
" NodeSet ";
649 computeNodeOrder(NodeSets);
652 checkValidNodeOrder(Circuits);
655 Scheduled = schedulePipeline(Schedule);
660 Pass.ORE->emit([&]() {
663 <<
"Unable to find schedule";
670 if (numStages == 0) {
673 Pass.ORE->emit([&]() {
676 <<
"No need to pipeline - no overlapped iterations in schedule.";
683 <<
" : too many stages, abort\n");
684 NumFailLargeMaxStage++;
685 Pass.ORE->emit([&]() {
688 <<
"Too many stages in schedule: "
689 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
691 <<
". Refer to -pipeliner-max-stages.";
696 Pass.ORE->emit([&]() {
699 <<
"Pipelined succesfully!";
704 std::vector<MachineInstr *> OrderedInsts;
708 OrderedInsts.push_back(SU->getInstr());
709 Cycles[SU->getInstr()] =
Cycle;
714 for (
auto &KV : NewMIs) {
715 Cycles[KV.first] = Cycles[KV.second];
716 Stages[KV.first] = Stages[KV.second];
717 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
724 "Cannot serialize a schedule with InstrChanges!");
748 for (
auto &KV : NewMIs)
759 unsigned &InitVal,
unsigned &LoopVal) {
760 assert(Phi.isPHI() &&
"Expecting a Phi.");
764 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
765 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
766 InitVal = Phi.getOperand(i).getReg();
768 LoopVal = Phi.getOperand(i).getReg();
770 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
776 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
777 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
778 return Phi.getOperand(i).getReg();
787 while (!Worklist.
empty()) {
789 for (
const auto &SI : SU->
Succs) {
790 SUnit *SuccSU = SI.getSUnit();
792 if (Visited.
count(SuccSU))
807 return MI.isCall() ||
MI.mayRaiseFPException() ||
808 MI.hasUnmodeledSideEffects() ||
809 (
MI.hasOrderedMemoryRef() &&
810 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
818 if (!
MI->hasOneMemOperand())
824 for (
const Value *V : Objs) {
836void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
843 PendingLoads.
clear();
844 else if (
MI.mayLoad()) {
849 for (
const auto *V : Objs) {
853 }
else if (
MI.mayStore()) {
858 for (
const auto *V : Objs) {
860 PendingLoads.
find(V);
861 if (
I == PendingLoads.
end())
863 for (
auto *Load :
I->second) {
871 int64_t Offset1, Offset2;
872 bool Offset1IsScalable, Offset2IsScalable;
874 Offset1IsScalable,
TRI) &&
876 Offset2IsScalable,
TRI)) {
878 Offset1IsScalable == Offset2IsScalable &&
879 (
int)Offset1 < (
int)Offset2) {
881 "What happened to the chain edge?");
932void SwingSchedulerDAG::updatePhiDependences() {
940 unsigned HasPhiUse = 0;
941 unsigned HasPhiDef = 0;
965 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
970 }
else if (MO.isUse()) {
973 if (
DefMI ==
nullptr)
980 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
987 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
996 for (
auto &PI :
I.Preds) {
999 if (
I.getInstr()->isPHI()) {
1008 for (
const SDep &
D : RemoveDeps)
1015void SwingSchedulerDAG::changeDependences() {
1020 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1021 int64_t NewOffset = 0;
1022 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1027 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1047 for (
const SDep &
P :
I.Preds)
1048 if (
P.getSUnit() == DefSU)
1050 for (
const SDep &
D : Deps) {
1056 for (
auto &
P : LastSU->
Preds)
1059 for (
const SDep &
D : Deps) {
1072 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1083 std::vector<MachineInstr *> &OrderedInsts,
1091 Stage <= LastStage; ++Stage) {
1094 Instrs[
Cycle].push_front(SU);
1101 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1103 for (
SUnit *SU : CycleInstrs) {
1105 OrderedInsts.push_back(
MI);
1115struct FuncUnitSorter {
1121 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1129 unsigned min = UINT_MAX;
1130 if (InstrItins && !InstrItins->
isEmpty()) {
1133 InstrItins->
endStage(SchedClass))) {
1136 if (numAlternatives < min) {
1137 min = numAlternatives;
1154 if (!PRE.ReleaseAtCycle)
1158 unsigned NumUnits = ProcResource->
NumUnits;
1159 if (NumUnits < min) {
1161 F = PRE.ProcResourceIdx;
1166 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1175 unsigned SchedClass =
MI.getDesc().getSchedClass();
1176 if (InstrItins && !InstrItins->
isEmpty()) {
1179 InstrItins->
endStage(SchedClass))) {
1182 Resources[FuncUnits]++;
1197 if (!PRE.ReleaseAtCycle)
1199 Resources[PRE.ProcResourceIdx]++;
1203 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1209 unsigned MFUs1 = minFuncUnits(IS1, F1);
1210 unsigned MFUs2 = minFuncUnits(IS2, F2);
1213 return MFUs1 > MFUs2;
1218class HighRegisterPressureDetector {
1223 const unsigned PSetNum;
1229 std::vector<unsigned> InitSetPressure;
1233 std::vector<unsigned> PressureSetLimit;
1240 using OrderedInstsTy = std::vector<MachineInstr *>;
1244 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1245 if (Pressures.size() == 0) {
1249 for (
unsigned P : Pressures) {
1259 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1261 dbgs() << *PSetIter <<
' ';
1266 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1268 auto PSetIter =
MRI.getPressureSets(
Reg);
1269 unsigned Weight = PSetIter.getWeight();
1270 for (; PSetIter.isValid(); ++PSetIter)
1271 Pressure[*PSetIter] += Weight;
1274 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1276 auto PSetIter =
MRI.getPressureSets(
Reg);
1277 unsigned Weight = PSetIter.getWeight();
1278 for (; PSetIter.isValid(); ++PSetIter) {
1279 auto &
P = Pressure[*PSetIter];
1281 "register pressure must be greater than or equal weight");
1288 return Reg.isPhysical() &&
MRI.isReserved(
Reg.asMCReg());
1292 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1303 void computeLiveIn() {
1305 for (
auto &
MI : *OrigMBB) {
1306 if (
MI.isDebugInstr())
1314 if (isReservedRegister(
Reg))
1316 if (isDefinedInThisLoop(
Reg))
1322 for (
auto LiveIn : Used)
1323 increaseRegisterPressure(InitSetPressure, LiveIn);
1328 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1343 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1344 Instr2StageTy &Stages)
const {
1350 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1351 if (isDefinedInThisLoop(
Reg))
1357 UpdateTargetRegs(
Reg);
1359 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1360 UpdateTargetRegs(
Use.RegUnit);
1365 return Stages[
MI] +
MI->isPHI();
1370 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses) {
1378 if (InstrScore(Orig) < InstrScore(New))
1384 Instr2LastUsesTy LastUses;
1385 for (
auto &Entry : LastUseMI)
1402 std::vector<unsigned>
1403 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1404 Instr2StageTy &Stages,
1405 const unsigned StageCount)
const {
1412 auto CurSetPressure = InitSetPressure;
1413 auto MaxSetPressure = InitSetPressure;
1414 auto LastUses = computeLastUses(OrderedInsts, Stages);
1417 dbgs() <<
"Ordered instructions:\n";
1419 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1424 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1426 if (!
Reg.isValid() || isReservedRegister(
Reg))
1434 increaseRegisterPressure(CurSetPressure,
Reg);
1438 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1440 if (!
Reg.isValid() || isReservedRegister(
Reg))
1444 if (!RegSet.contains(
Reg))
1449 decreaseRegisterPressure(CurSetPressure,
Reg);
1453 for (
unsigned I = 0;
I < StageCount;
I++) {
1455 const auto Stage = Stages[
MI];
1459 const unsigned Iter =
I - Stage;
1461 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1462 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1464 for (
auto LastUse : LastUses[
MI]) {
1467 EraseReg(LiveRegSets[Iter - 1], LastUse);
1469 EraseReg(LiveRegSets[Iter], LastUse);
1473 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1474 MaxSetPressure[PSet] =
1475 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1478 dbgs() <<
"CurSetPressure=";
1479 dumpRegisterPressures(CurSetPressure);
1480 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1486 return MaxSetPressure;
1492 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1493 TRI(MF.getSubtarget().getRegisterInfo()),
1494 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1495 PressureSetLimit(PSetNum, 0) {}
1501 if (
MI.isDebugInstr())
1503 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1507 computePressureSetLimit(RCI);
1513 const unsigned MaxStage)
const {
1515 "the percentage of the margin must be between 0 to 100");
1517 OrderedInstsTy OrderedInsts;
1518 Instr2StageTy Stages;
1520 const auto MaxSetPressure =
1521 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1524 dbgs() <<
"Dump MaxSetPressure:\n";
1525 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1526 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1531 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1532 unsigned Limit = PressureSetLimit[PSet];
1535 <<
" Margin=" << Margin <<
"\n");
1536 if (Limit < MaxSetPressure[PSet] + Margin) {
1539 <<
"Rejected the schedule because of too high register pressure\n");
1555unsigned SwingSchedulerDAG::calculateResMII() {
1558 return RM.calculateResMII();
1567unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1568 unsigned RecMII = 0;
1570 for (
NodeSet &Nodes : NodeSets) {
1574 unsigned Delay = Nodes.getLatency();
1575 unsigned Distance = 1;
1578 unsigned CurMII = (Delay + Distance - 1) / Distance;
1579 Nodes.setRecMII(CurMII);
1580 if (CurMII > RecMII)
1588void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1592 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1595 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1598 if (OE.isOutputDep()) {
1599 int N = OE.getDst()->NodeNum;
1601 auto Dep = OutputDeps.
find(BackEdge);
1602 if (Dep != OutputDeps.
end()) {
1603 BackEdge = Dep->second;
1604 OutputDeps.
erase(Dep);
1606 OutputDeps[
N] = BackEdge;
1609 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1621 int N = OE.getDst()->NodeNum;
1623 AdjK[i].push_back(
N);
1629 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1634 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1635 int N = Src->NodeNum;
1637 AdjK[i].push_back(
N);
1644 for (
auto &OD : OutputDeps)
1645 if (!
Added.test(OD.second)) {
1646 AdjK[OD.first].push_back(OD.second);
1647 Added.set(OD.second);
1653bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1661 for (
auto W : AdjK[V]) {
1662 if (NumPaths > MaxPaths)
1673 if (!Blocked.test(W)) {
1674 if (circuit(W, S, NodeSets, DAG,
1675 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1683 for (
auto W : AdjK[V]) {
1694void SwingSchedulerDAG::Circuits::unblock(
int U) {
1697 while (!BU.
empty()) {
1699 assert(SI != BU.
end() &&
"Invalid B set.");
1702 if (Blocked.test(
W->NodeNum))
1703 unblock(
W->NodeNum);
1709void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1710 Circuits Cir(
SUnits, Topo);
1712 Cir.createAdjacencyStructure(
this);
1713 for (
int I = 0, E =
SUnits.size();
I != E; ++
I) {
1715 Cir.circuit(
I,
I, NodeSets,
this);
1748 for (
auto &Dep : SU.
Preds) {
1749 SUnit *TmpSU = Dep.getSUnit();
1761 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1769 for (
auto &Dep : PHISUs[Index]->Succs) {
1773 SUnit *TmpSU = Dep.getSUnit();
1783 if (UseSUs.
size() == 0)
1788 for (
auto *
I : UseSUs) {
1789 for (
auto *Src : SrcSUs) {
1805void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1806 ScheduleInfo.resize(
SUnits.size());
1809 for (
int I : Topo) {
1817 for (
int I : Topo) {
1819 int zeroLatencyDepth = 0;
1821 for (
const auto &IE : DDG->getInEdges(SU)) {
1823 if (
IE.getLatency() == 0)
1826 if (
IE.ignoreDependence(
true))
1828 asap = std::max(asap, (
int)(
getASAP(Pred) +
IE.getLatency() -
1829 IE.getDistance() * MII));
1831 maxASAP = std::max(maxASAP, asap);
1832 ScheduleInfo[
I].ASAP = asap;
1833 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1839 int zeroLatencyHeight = 0;
1841 for (
const auto &OE : DDG->getOutEdges(SU)) {
1842 SUnit *Succ = OE.getDst();
1845 if (OE.getLatency() == 0)
1848 if (OE.ignoreDependence(
true))
1850 alap = std::min(alap, (
int)(
getALAP(Succ) - OE.getLatency() +
1851 OE.getDistance() * MII));
1854 ScheduleInfo[
I].ALAP = alap;
1855 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1860 I.computeNodeSetInfo(
this);
1863 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1864 dbgs() <<
"\tNode " << i <<
":\n";
1886 SUnit *PredSU = IE.getSrc();
1887 if (S && S->count(PredSU) == 0)
1889 if (IE.ignoreDependence(
true))
1900 SUnit *SuccSU = OE.getDst();
1901 if (!OE.isAntiDep())
1903 if (S && S->count(SuccSU) == 0)
1909 return !Preds.
empty();
1922 SUnit *SuccSU = OE.getDst();
1923 if (S && S->count(SuccSU) == 0)
1925 if (OE.ignoreDependence(
false))
1936 SUnit *PredSU = IE.getSrc();
1937 if (!IE.isAntiDep())
1939 if (S && S->count(PredSU) == 0)
1945 return !Succs.
empty();
1961 if (!Visited.
insert(Cur).second)
1962 return Path.contains(Cur);
1963 bool FoundPath =
false;
1965 if (!OE.ignoreDependence(
false))
1967 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
1969 if (IE.isAntiDep() && IE.getDistance() == 0)
1971 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
1986 for (
SUnit *SU : NS) {
1992 if (Reg.isVirtual())
1994 else if (
MRI.isAllocatable(Reg))
1999 for (
SUnit *SU : NS)
2003 if (Reg.isVirtual()) {
2004 if (!
Uses.count(Reg))
2006 }
else if (
MRI.isAllocatable(Reg)) {
2008 if (!
Uses.count(Unit))
2017void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2018 for (
auto &NS : NodeSets) {
2024 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2026 RecRPTracker.closeBottom();
2028 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2030 return A->NodeNum >
B->NodeNum;
2033 for (
auto &SU :
SUnits) {
2039 RecRPTracker.setPos(std::next(CurInstI));
2043 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2048 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2051 NS.setExceedPressure(SU);
2054 RecRPTracker.recede();
2061void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2062 unsigned Colocate = 0;
2063 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2068 for (
int j = i + 1;
j <
e; ++
j) {
2089void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2094 for (
auto &NS : NodeSets) {
2095 if (NS.getRecMII() > 2)
2097 if (NS.getMaxDepth() > MII)
2106void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2118 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2125 if (
succ_L(NodesAdded,
N, DDG.get())) {
2129 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2134 NodesAdded.
insert(
I.begin(),
I.end());
2141 if (
succ_L(NodesAdded,
N, DDG.get()))
2143 addConnectedNodes(
I, NewSet, NodesAdded);
2144 if (!NewSet.
empty())
2145 NodeSets.push_back(NewSet);
2150 if (
pred_L(NodesAdded,
N, DDG.get()))
2152 addConnectedNodes(
I, NewSet, NodesAdded);
2153 if (!NewSet.
empty())
2154 NodeSets.push_back(NewSet);
2159 if (NodesAdded.
count(&SU) == 0) {
2161 addConnectedNodes(&SU, NewSet, NodesAdded);
2162 if (!NewSet.
empty())
2163 NodeSets.push_back(NewSet);
2169void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2173 for (
auto &OE : DDG->getOutEdges(SU)) {
2175 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2177 addConnectedNodes(
Successor, NewSet, NodesAdded);
2179 for (
auto &IE : DDG->getInEdges(SU)) {
2180 SUnit *Predecessor =
IE.getSrc();
2181 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2182 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2191 for (
SUnit *SU : Set1) {
2192 if (Set2.
count(SU) != 0)
2195 return !Result.empty();
2199void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2208 for (
SUnit *SU : *J)
2220void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2224 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2239void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2243 for (
auto &Nodes : NodeSets) {
2248 R.insert(
N.begin(),
N.end());
2251 }
else if (
succ_L(NodeOrder,
N, DDG.get()) &&
2253 R.insert(
N.begin(),
N.end());
2261 }
else if (NodeSets.size() == 1) {
2262 for (
const auto &
N : Nodes)
2263 if (
N->Succs.size() == 0)
2269 SUnit *maxASAP =
nullptr;
2270 for (
SUnit *SU : Nodes) {
2280 while (!
R.empty()) {
2281 if (Order == TopDown) {
2285 while (!
R.empty()) {
2286 SUnit *maxHeight =
nullptr;
2299 NodeOrder.insert(maxHeight);
2301 R.remove(maxHeight);
2302 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2303 SUnit *SU = OE.getDst();
2304 if (Nodes.count(SU) == 0)
2306 if (NodeOrder.contains(SU))
2308 if (OE.ignoreDependence(
false))
2317 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2319 if (!
IE.isAntiDep())
2321 if (Nodes.count(SU) == 0)
2323 if (NodeOrder.contains(SU))
2331 if (
pred_L(NodeOrder,
N, DDG.get(), &Nodes))
2332 R.insert(
N.begin(),
N.end());
2337 while (!
R.empty()) {
2338 SUnit *maxDepth =
nullptr;
2350 NodeOrder.insert(maxDepth);
2353 if (Nodes.isExceedSU(maxDepth)) {
2356 R.insert(Nodes.getNode(0));
2359 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2361 if (Nodes.count(SU) == 0)
2363 if (NodeOrder.contains(SU))
2372 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2373 SUnit *SU = OE.getDst();
2374 if (!OE.isAntiDep())
2376 if (Nodes.count(SU) == 0)
2378 if (NodeOrder.contains(SU))
2386 if (
succ_L(NodeOrder,
N, DDG.get(), &Nodes))
2387 R.insert(
N.begin(),
N.end());
2394 dbgs() <<
"Node order: ";
2395 for (
SUnit *
I : NodeOrder)
2396 dbgs() <<
" " <<
I->NodeNum <<
" ";
2403bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2405 if (NodeOrder.empty()){
2410 bool scheduleFound =
false;
2411 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2415 HRPDetector->init(RegClassInfo);
2418 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2430 int EarlyStart = INT_MIN;
2431 int LateStart = INT_MAX;
2440 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2442 if (EarlyStart > LateStart)
2443 scheduleFound =
false;
2444 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2446 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2447 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2449 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2450 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2451 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2460 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2462 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2465 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2474 scheduleFound =
false;
2478 dbgs() <<
"\tCan't schedule\n";
2480 }
while (++NI != NE && scheduleFound);
2502 if (scheduleFound) {
2508 if (scheduleFound) {
2510 Pass.ORE->emit([&]() {
2513 <<
"Schedule found with Initiation Interval: "
2515 <<
", MaxStageCount: "
2526bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta)
const {
2530 bool OffsetIsScalable;
2535 if (OffsetIsScalable)
2538 if (!BaseOp->
isReg())
2546 if (BaseDef && BaseDef->
isPHI()) {
2570 unsigned &OffsetPos,
2576 unsigned BasePosLd, OffsetPosLd;
2579 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2584 if (!Phi || !
Phi->isPHI())
2593 if (!PrevDef || PrevDef ==
MI)
2599 unsigned BasePos1 = 0, OffsetPos1 = 0;
2605 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2615 BasePos = BasePosLd;
2616 OffsetPos = OffsetPosLd;
2628 InstrChanges.find(SU);
2629 if (It != InstrChanges.end()) {
2630 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2631 unsigned BasePos, OffsetPos;
2634 Register BaseReg =
MI->getOperand(BasePos).getReg();
2640 if (BaseStageNum < DefStageNum) {
2642 int OffsetDiff = DefStageNum - BaseStageNum;
2643 if (DefCycleNum < BaseCycleNum) {
2649 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2664 while (Def->isPHI()) {
2665 if (!Visited.
insert(Def).second)
2667 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2668 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2693 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2707 unsigned DeltaS, DeltaD;
2708 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2712 int64_t OffsetS, OffsetD;
2713 bool OffsetSIsScalable, OffsetDIsScalable;
2721 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2722 "Expected offsets to be byte offsets");
2726 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2729 unsigned InitValS = 0;
2730 unsigned LoopValS = 0;
2731 unsigned InitValD = 0;
2732 unsigned LoopValD = 0;
2756 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2760 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2761 OffsetD + (int64_t)AccessSizeD.
getValue());
2764void SwingSchedulerDAG::postProcessDAG() {
2765 for (
auto &M : Mutations)
2775 bool forward =
true;
2777 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2778 << EndCycle <<
" II: " <<
II <<
"\n";
2780 if (StartCycle > EndCycle)
2784 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2785 for (
int curCycle = StartCycle; curCycle != termCycle;
2786 forward ? ++curCycle : --curCycle) {
2791 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2796 ProcItinResources.reserveResources(*SU, curCycle);
2797 ScheduledInstrs[curCycle].push_back(SU);
2798 InstrToCycle.insert(std::make_pair(SU, curCycle));
2799 if (curCycle > LastCycle)
2800 LastCycle = curCycle;
2801 if (curCycle < FirstCycle)
2802 FirstCycle = curCycle;
2806 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2819 int EarlyCycle = INT_MAX;
2820 while (!Worklist.
empty()) {
2823 if (Visited.
count(PrevSU))
2825 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2826 if (it == InstrToCycle.end())
2828 EarlyCycle = std::min(EarlyCycle, it->second);
2829 for (
const auto &IE : DDG->
getInEdges(PrevSU))
2830 if (IE.isOrderDep() || IE.isOutputDep())
2843 int LateCycle = INT_MIN;
2844 while (!Worklist.
empty()) {
2849 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2850 if (it == InstrToCycle.end())
2852 LateCycle = std::max(LateCycle, it->second);
2854 if (OE.isOrderDep() || OE.isOutputDep())
2865 for (
auto &
P : SU->
Preds)
2866 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
2867 for (
auto &S :
P.getSUnit()->Succs)
2868 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
2869 return P.getSUnit();
2882 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2885 if (IE.getSrc() ==
I) {
2890 *MinLateStart = std::min(*MinLateStart,
End);
2892 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
2893 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2898 if (OE.getDst() ==
I) {
2903 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
2905 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
2906 *MinLateStart = std::min(*MinLateStart, LateStart);
2911 for (
const auto &Dep : SU->
Preds) {
2914 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
2916 *MinLateStart = std::min(*MinLateStart, cycle);
2926 std::deque<SUnit *> &Insts)
const {
2928 bool OrderBeforeUse =
false;
2929 bool OrderAfterDef =
false;
2930 bool OrderBeforeDef =
false;
2931 unsigned MoveDef = 0;
2932 unsigned MoveUse = 0;
2937 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2940 if (!MO.isReg() || !MO.getReg().isVirtual())
2944 unsigned BasePos, OffsetPos;
2945 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2946 if (
MI->getOperand(BasePos).getReg() == Reg)
2950 std::tie(Reads,
Writes) =
2951 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2953 OrderBeforeUse =
true;
2958 OrderAfterDef =
true;
2962 OrderBeforeUse =
true;
2966 OrderAfterDef =
true;
2970 OrderBeforeUse =
true;
2974 OrderAfterDef =
true;
2979 OrderBeforeUse =
true;
2985 OrderBeforeDef =
true;
2993 if (OE.getDst() != *
I)
2996 OrderBeforeUse =
true;
3003 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3005 OrderBeforeUse =
true;
3006 if ((MoveUse == 0) || (Pos < MoveUse))
3011 if (IE.getSrc() != *
I)
3013 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3015 OrderAfterDef =
true;
3022 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3023 OrderBeforeUse =
false;
3028 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3032 if (OrderBeforeUse && OrderAfterDef) {
3033 SUnit *UseSU = Insts.at(MoveUse);
3034 SUnit *DefSU = Insts.at(MoveDef);
3035 if (MoveUse > MoveDef) {
3036 Insts.erase(Insts.begin() + MoveUse);
3037 Insts.erase(Insts.begin() + MoveDef);
3039 Insts.erase(Insts.begin() + MoveDef);
3040 Insts.erase(Insts.begin() + MoveUse);
3050 Insts.push_front(SU);
3052 Insts.push_back(SU);
3060 assert(Phi.isPHI() &&
"Expecting a Phi.");
3065 unsigned InitVal = 0;
3066 unsigned LoopVal = 0;
3067 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3075 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3093 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3099 if (DMO.getReg() == LoopReg)
3110 if (InstrToCycle.count(IE.getSrc()))
3121 for (
auto &SU : SSD->
SUnits)
3126 while (!Worklist.
empty()) {
3128 if (DoNotPipeline.
count(SU))
3131 DoNotPipeline.
insert(SU);
3138 if (OE.getDistance() == 1)
3141 return DoNotPipeline;
3150 int NewLastCycle = INT_MIN;
3155 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3162 if (IE.getDistance() == 0)
3163 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3168 if (OE.getDistance() == 1)
3169 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3171 int OldCycle = InstrToCycle[&SU];
3172 if (OldCycle != NewCycle) {
3173 InstrToCycle[&SU] = NewCycle;
3178 <<
") is not pipelined; moving from cycle " << OldCycle
3179 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3181 NewLastCycle = std::max(NewLastCycle, NewCycle);
3183 LastCycle = NewLastCycle;
3200 int CycleDef = InstrToCycle[&SU];
3201 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3203 SUnit *Dst = OE.getDst();
3204 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3205 if (OE.getReg().isPhysical()) {
3208 if (InstrToCycle[Dst] <= CycleDef)
3226void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3229 typedef std::pair<SUnit *, unsigned> UnitIndex;
3230 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3232 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3233 Indices.push_back(std::make_pair(NodeOrder[i], i));
3235 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3236 return std::get<0>(i1) < std::get<0>(i2);
3249 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3250 SUnit *SU = NodeOrder[i];
3253 bool PredBefore =
false;
3254 bool SuccBefore =
false;
3261 for (
const auto &IE : DDG->getInEdges(SU)) {
3262 SUnit *PredSU = IE.getSrc();
3263 unsigned PredIndex = std::get<1>(
3272 for (
const auto &OE : DDG->getOutEdges(SU)) {
3273 SUnit *SuccSU = OE.getDst();
3279 unsigned SuccIndex = std::get<1>(
3292 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3297 NumNodeOrderIssues++;
3301 <<
" are scheduled before node " << SU->
NodeNum
3308 dbgs() <<
"Invalid node order found!\n";
3319 unsigned OverlapReg = 0;
3320 unsigned NewBaseReg = 0;
3321 for (
SUnit *SU : Instrs) {
3323 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3331 InstrChanges.find(SU);
3332 if (It != InstrChanges.end()) {
3333 unsigned BasePos, OffsetPos;
3339 MI->getOperand(OffsetPos).getImm() - It->second.second;
3352 unsigned TiedUseIdx = 0;
3353 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3355 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3357 NewBaseReg =
MI->getOperand(i).getReg();
3366 const std::deque<SUnit *> &Instrs)
const {
3367 std::deque<SUnit *> NewOrderPhi;
3368 for (
SUnit *SU : Instrs) {
3370 NewOrderPhi.push_back(SU);
3372 std::deque<SUnit *> NewOrderI;
3373 for (
SUnit *SU : Instrs) {
3389 std::deque<SUnit *> &cycleInstrs =
3390 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3392 ScheduledInstrs[cycle].push_front(SU);
3398 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3399 ScheduledInstrs.erase(cycle);
3409 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3418 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3419 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3420 for (
const auto &
I : Nodes)
3421 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3425#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3432 for (
SUnit *CI : cycleInstrs->second) {
3434 os <<
"(" << CI->
NodeNum <<
") ";
3445void ResourceManager::dumpMRT()
const {
3449 std::stringstream SS;
3451 SS << std::setw(4) <<
"Slot";
3453 SS << std::setw(3) <<
I;
3454 SS << std::setw(7) <<
"#Mops"
3456 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3457 SS << std::setw(4) << Slot;
3459 SS << std::setw(3) << MRT[Slot][
I];
3460 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3469 unsigned ProcResourceID = 0;
3474 "Too many kinds of resources, unsupported");
3480 if (
Desc.SubUnitsIdxBegin)
3482 Masks[
I] = 1ULL << ProcResourceID;
3488 if (!
Desc.SubUnitsIdxBegin)
3490 Masks[
I] = 1ULL << ProcResourceID;
3491 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3492 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3497 dbgs() <<
"ProcResourceDesc:\n";
3500 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3501 ProcResource->
Name,
I, Masks[
I],
3504 dbgs() <<
" -----------------\n";
3512 dbgs() <<
"canReserveResources:\n";
3515 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3521 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3527 reserveResources(SCDesc,
Cycle);
3528 bool Result = !isOverbooked();
3529 unreserveResources(SCDesc,
Cycle);
3538 dbgs() <<
"reserveResources:\n";
3541 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3547 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3553 reserveResources(SCDesc,
Cycle);
3558 dbgs() <<
"reserveResources: done!\n\n";
3569 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3572 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3581 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3584 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3587bool ResourceManager::isOverbooked()
const {
3589 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3592 if (MRT[Slot][
I] >
Desc->NumUnits)
3595 if (NumScheduledMops[Slot] > IssueWidth)
3601int ResourceManager::calculateResMIIDFA()
const {
3606 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3608 FUS.calcCriticalResources(*SU.
getInstr());
3619 while (!FuncUnitOrder.empty()) {
3621 FuncUnitOrder.pop();
3628 unsigned ReservedCycles = 0;
3629 auto *RI = Resources.
begin();
3630 auto *RE = Resources.
end();
3632 dbgs() <<
"Trying to reserve resource for " << NumCycles
3633 <<
" cycles for \n";
3636 for (
unsigned C = 0;
C < NumCycles; ++
C)
3638 if ((*RI)->canReserveResources(*
MI)) {
3639 (*RI)->reserveResources(*
MI);
3646 <<
", NumCycles:" << NumCycles <<
"\n");
3648 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3650 <<
"NewResource created to reserve resources"
3653 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3654 NewResource->reserveResources(*
MI);
3655 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3659 int Resmii = Resources.
size();
3666 return calculateResMIIDFA();
3685 <<
" WriteProcRes: ";
3696 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3699 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3704 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3707 dbgs() <<
"#Mops: " << NumMops <<
", "
3708 <<
"IssueWidth: " << IssueWidth <<
", "
3709 <<
"Cycles: " << Result <<
"\n";
3714 std::stringstream SS;
3715 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3716 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3723 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3726 std::stringstream SS;
3727 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3728 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3729 << std::setw(10) << Cycles <<
"\n";
3733 if (Cycles > Result)
3740 InitiationInterval =
II;
3741 DFAResources.clear();
3742 DFAResources.resize(
II);
3743 for (
auto &
I : DFAResources)
3744 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3747 NumScheduledMops.
clear();
3752 if (Pred.isArtificial() || Dst->isBoundaryNode())
3757 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
3760SwingSchedulerDDG::SwingSchedulerDDGEdges &
3761SwingSchedulerDDG::getEdges(
const SUnit *SU) {
3763 return EntrySUEdges;
3769const SwingSchedulerDDG::SwingSchedulerDDGEdges &
3770SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
3772 return EntrySUEdges;
3778void SwingSchedulerDDG::addEdge(
const SUnit *SU,
3780 auto &Edges = getEdges(SU);
3782 Edges.Succs.push_back(Edge);
3784 Edges.Preds.push_back(Edge);
3787void SwingSchedulerDDG::initEdges(
SUnit *SU) {
3788 for (
const auto &PI : SU->
Preds) {
3793 for (
const auto &SI : SU->
Succs) {
3801 : EntrySU(EntrySU), ExitSU(ExitSU) {
3802 EdgesVec.resize(SUnits.size());
3806 for (
auto &SU : SUnits)
3812 return getEdges(SU).Preds;
3817 return getEdges(SU).Succs;
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isOutputDep() const
Returns true if the edge represents output dependence.
Represents dependencies between instructions.
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU)
const EdgesType & getInEdges(const SUnit *SU) const
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool isMVEExpanderSupported()
Return true if the target can expand pipelined schedule with modulo variable expansion.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
The main class in the implementation of the target independent window scheduler.
unsigned getPosition() const
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
const TargetPassConfig * PassConfig
const MachineDominatorTree * MDT
RegisterClassInfo * RegClassInfo
const MachineLoopInfo * MLI
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.